Page 1 of 1

Floppy driver headache

Posted: Thu Nov 25, 2010 4:00 pm
by davidv1992
I'm currently coding my floppy driver, the first real device driver I'm building, and I have the problem that, although it works under bochs, it fails to reconfigure the controller under real hardware conditions. The recalibrate command fails to execute correctly and results in the MSR having a status of 0x90 (ie cmd busy, expecting input). I've been trying to debug it since this morning (its 10:57 PM now) but so far my conclusion is that something is going wrong so horribly that my debug code is interfering with reproducing results.

Here is the code i use:

Code: Select all

#include <util.h>
#include <cmos.h>
#include <display.h>
#include <irq.h>
#include <idt.h>
#include <sched.h>
#include <proc.h>
#include <kcall_internal.h>

#define SRA			0x3F0
#define SRB			0x3F1
#define DOR			0x3F2
#define TDR			0x3F3
#define MSR			0x3F4
#define DSR			0x3F4
#define DATA_FIFO	0x3F5
#define DIR			0x3F7
#define CCR			0x3F7

unsigned int FTID;
unsigned int irq_waiting;
unsigned int timer_id=0;
unsigned long long timer_time=0;

void irq6()
{
	irq_waiting = 1;
	resume(FTID);
	print("IRQ6\n");
}

void wait_timeout(int msec)
{
	unsigned int ms, ls;
	unsigned long long intime;
	
	asm volatile("mov $0x32, %%eax\n"
				 "int $0x70" : "=a"(ms), "=b"(ls) : );
	
	intime = (((long long)ms)<<32) | ((long long)ls);
	intime += msec;
	
	ms = (timer_time >> 32) & 0xFFFFFFFF;
	ls = (timer_time) & 0xFFFFFFFF;
	
	asm volatile("int $0x70" : : "a"(KC_TIMER_DESTROY), "b"(timer_id), "c"(ms), "d"(ls) );
	
	timer_time = intime;
	
	ms = (intime >> 32) & 0xFFFFFFFF;
	ls = (intime) & 0xFFFFFFFF;
	
	asm volatile("int $0x70" : "=a"(timer_id) : "a"(KC_TIMER_CREATE), "b"(FTID), "c"(ms), "d"(ls) );
}

void controller_reset()
{
	unsigned char dor_orig;
	unsigned char PIT_COUNT;
	
	asm("cli");
	
	dor_orig = inb(DOR);
	outb(DOR, dor_orig & ~4);
	
	PIT_COUNT = inb(0x40);
	inb(0x40);
	
	if (PIT_COUNT + 10 < PIT_COUNT)
	{
		while (((signed char)inb(0x40)) < ((signed char)PIT_COUNT + 10)) inb(0x40);
	}
	else
	{
		while (inb(0x40) < PIT_COUNT+10) inb(0x40);
	}
	
	inb(0x40);
	
	outb(DOR, dor_orig);
	
	wait_timeout(3000);
	
	asm("sti");
}

int controller_command(unsigned char command, unsigned char *inbytes, 
			unsigned int inlen, unsigned char *outbytes, unsigned int outlen,
			unsigned int wait_irq)
{
	int i;
	
	if (wait_irq)
		irq_waiting = 0;
	
	for (i=0; i<16 && (inb(MSR) & 0xd0) != 0x80; i++)
		controller_reset();
	if ((inb(MSR) & 0xd0) != 0x80)
		return 1;
	
	outb(DATA_FIFO, command);
	
	while (inlen > 0)
	{
		for (i=0; i<128 && (inb(MSR) & 0x80) != 0x80; i++);
		
		if ((inb(MSR) & 0x80) != 0x80)
			return 2;
		
		if ((inb(MSR) & 0x50) != 0x10)
			return 3;
		
		outb(DATA_FIFO, *inbytes);
		inbytes++;
		inlen--;
	}
	
	if ((inb(MSR) & 0x20) == 0x20)
		PANIC("NDMA Set, no support for execution phase (yet).");
	
	if (wait_irq)
	{
		asm("cli");
		
		if (irq_waiting == 0)
			wait_timeout(wait_irq);
		
		if (idt_interruptable())
			asm("sti");
	}
	
	for (i=0; i<2048 && (inb(MSR) & 0x80) != 0x80; i++);
	
	if ((inb(MSR) & 0x80) != 0x80)
		return 4;
	
	while (outlen > 0)
	{
		for (i=0; i<128 && (inb(MSR) & 0x80) != 0x80; i++);
		
		if ((inb(MSR) & 0x80) != 0x80)
			return 5;
		
		if ((inb(MSR) & 0x50) != 0x50)
			return 6;
		
		*outbytes = inb(DATA_FIFO);
		outbytes++;
		outlen--;
	}
	
	for (i=0; i<1024  && (inb(MSR) & 0x80) != 0x80; i++) wait_timeout(100);
	
	if ((inb(MSR) & 0xD0) != 0x80)
		return inb(MSR);
	
	return 0;
}

void floppy_controller_init()
{
	unsigned char floppy_res;
	unsigned char floppy_in[3];
	int i;
	char succes;
	
	if ((succes = controller_command(0x10, (unsigned char *)0, 0, &floppy_res, 1,0)))
	{
		print_num(succes);
		print("\n");
		PANIC("Unable to read out floppy state.");
	}
	
	if (floppy_res != 0x90)
		PANIC("Unsuported floppy controller.");
	
	outb(DOR, inb(DOR) | 0x08);
	
	floppy_in[0] = 0;
	floppy_in[1] = 0x47;
	floppy_in[2] = 0;
	
	if ((succes = controller_command(0x13, floppy_in, 3, (unsigned char *)0, 0,0)))
	{
		print_num(succes);
		print("\n");
		PANIC("Unable to reconfigure controller.");
	}
	
	if ((succes = controller_command(0x94, (unsigned char *)0, 0, floppy_in, 1,0)))
	{
		print_num(succes);
		print("\n");
		PANIC("Unable to lock config.");
	}
	
	if ((floppy_in[0] & 0x10) != 0x10)
		PANIC("Unable to lock config.");
	
	controller_reset();
	
	outb(DOR, (inb(DOR) & ~0x3) | 0x10);
	
	wait_timeout(500);
	
	i=0;
	
	do
	{
		floppy_in[0] = 0;
		print("Start\n");
		succes = controller_command(0x7, floppy_in, 1, (unsigned char*)0, 0, 10000);
		print("End\n");
		if (succes == 0)
		{
			succes = controller_command(0x8, (unsigned char*)0, 0, floppy_in, 2, 0);
			if ((floppy_in[0] & 0x20) != 0x20)
				succes = 1;
		}
		else
		{
			print_num(succes);
			print("\n");
		}
		i++;
	}
	while (i<4 && succes != 0);
	
	if (succes != 0)
		PANIC("Unable to recalibrate drive 0.");
	
	outb(DOR, (inb(DOR) & ~0x10));
	
	print_hex(cmos_read_byte(0x10));
	print("\nFinished.\n");
}

void floppy_task()
{
	irq_set_handle(6, irq6);	
	
	floppy_controller_init();
	
	while (1);
}

void floppy_init(int KPID)
{
	FTID = process_spawn_task(KPID, (unsigned int)floppy_task);
}
The problem is probably obvious to someone with a more intimate knowledge of the floppy hardware, but it has eluded me so far.

Re: Floppy driver headache

Posted: Fri Nov 26, 2010 12:04 am
by Brendan
Hi,

It looks mostly dodgy to me. You can't configure the controller and then reset it (because that discards most of the changes you just made). You also probably shouldn't assume that the controller wasn't left in the "reset state" by whatever used it before your OS started (and therefore you can't do a "version" command before resetting the controller). The first thing "floppy_controller_init()" should do is call "controller_reset()".

The reset seems messed up too. It needs to reset the controller (set the reset flag in the DOR then clear it - you do this already), then issue 4 "sense interrupt status" commands (to clear interrupt flags), then a "configure" command (to disable polling), then a "specify" command (to set initial timings, or to restore previously detected timings).

After reset, "floppy_controller_init()" can do the "version" command and see if the controller supports a FIFO or not; and if the controller does you can clear the LOCK bit (in case someone left it set), then configure the FIFO, then set the LOCK bit. If "version" says there's no FIFO you can skip this and everything else works the same, so there's no need to do a "unsupported floppy controller" message and give up.

For each (potential) floppy drive you should keep track of the drive's type, the drive's state and the type of media in the drive (if any). Initially all of this would be set to "unknown" by "floppy_controller_init()".

The next/last step is to determine which disk drive/s are present. I think you can turn their motors on, then examine the results of a "recalibrate" command (and the following "sense interrupt status" command). I can't remember if a "not present" drive causes a time-out or an abnormal termination. In any case, for a "present" drive you have to do "recalibrate" before doing much else (read/write/seek) anyway.

That's about all you can do during initialisation.

The places where you wait for an IRQ look dodgy to me too. If/when the IRQ occurs you should stop waiting immediately, but your code looks like it waits for the entire time-out (and only then checks to see if the IRQ occurred while you were waiting). I'd assume that either your scheduler is incomplete or your IPC is incomplete (or both). You should be able to tell the scheduler "don't give me any CPU time until this delay has expired or the IRQ handler does something to cancel the delay". Otherwise you end up with slow drivers (waiting much longer than necessary) or wasting CPU time polling.


Cheers,

Brendan

Re: Floppy driver headache

Posted: Fri Nov 26, 2010 12:32 am
by davidv1992
Brendan wrote:Hi,

It looks mostly dodgy to me. You can't configure the controller and then reset it (because that discards most of the changes you just made). You also probably shouldn't assume that the controller wasn't left in the "reset state" by whatever used it before your OS started (and therefore you can't do a "version" command before resetting the controller). The first thing "floppy_controller_init()" should do is call "controller_reset()".
Ok, so I need to reset controller before doing anything else.
Brendan wrote: The reset seems messed up too. It needs to reset the controller (set the reset flag in the DOR then clear it - you do this already), then issue 4 "sense interrupt status" commands (to clear interrupt flags), then a "configure" command (to disable polling), then a "specify" command (to set initial timings, or to restore previously detected timings).
My configure turns polling mode off, and after that it immediately does a lock on the configure. Shouldn't that lock prevent the configure from being necessary and also prevent the sense interrupts from being necessary?
Brendan wrote: After reset, "floppy_controller_init()" can do the "version" command and see if the controller supports a FIFO or not; and if the controller does you can clear the LOCK bit (in case someone left it set), then configure the FIFO, then set the LOCK bit. If "version" says there's no FIFO you can skip this and everything else works the same, so there's no need to do a "unsupported floppy controller" message and give up.
Good to know, however for the moment I'm going to try to keep things simple, especially with some bug already around.
Brendan wrote: For each (potential) floppy drive you should keep track of the drive's type, the drive's state and the type of media in the drive (if any). Initially all of this would be set to "unknown" by "floppy_controller_init()".

The next/last step is to determine which disk drive/s are present. I think you can turn their motors on, then examine the results of a "recalibrate" command (and the following "sense interrupt status" command). I can't remember if a "not present" drive causes a time-out or an abnormal termination. In any case, for a "present" drive you have to do "recalibrate" before doing much else (read/write/seek) anyway.

That's about all you can do during initialisation.
Ok forgot to mention, this is a partial driver and it depends on me guaranteeing that first drive is present and 1.44MB 3.5" with a floppy in the drive (and I know that is true for both bochs and the testbed system). It also does nothing with specify yet and that's why it aint handled that yet in the reset.
Brendan wrote: The places where you wait for an IRQ look dodgy to me too. If/when the IRQ occurs you should stop waiting immediately, but your code looks like it waits for the entire time-out (and only then checks to see if the IRQ occurred while you were waiting). I'd assume that either your scheduler is incomplete or your IPC is incomplete (or both). You should be able to tell the scheduler "don't give me any CPU time until this delay has expired or the IRQ handler does something to cancel the delay". Otherwise you end up with slow drivers (waiting much longer than necessary) or wasting CPU time polling.
The way I handle the IRQ is indeed not straightforward, what I do is I call my timer driver, which changes task status to blocked and queues it for a resume after the time intervall, the IRQ handler just resumes the floppy task every time it gets an IRQ.

Re: Floppy driver headache

Posted: Fri Nov 26, 2010 9:28 am
by Brendan
Hi,
davidv1992 wrote:
Brendan wrote: The reset seems messed up too. It needs to reset the controller (set the reset flag in the DOR then clear it - you do this already), then issue 4 "sense interrupt status" commands (to clear interrupt flags), then a "configure" command (to disable polling), then a "specify" command (to set initial timings, or to restore previously detected timings).
My configure turns polling mode off, and after that it immediately does a lock on the configure. Shouldn't that lock prevent the configure from being necessary and also prevent the sense interrupts from being necessary?
In general, there's 2 different types of interrupt flags - "interrupt enable/disable" flags, and also "interrupt pending" flags. The reset causes interrupts to be disabled, but doesn't clear the interrupt pending flags. The "sense interrupt status" commands clear the interrupt pending flags. If this isn't done, then after you enable interrupts (with the "configure" command) a drive may be unable to generate an interrupt because it's interrupt pending flag can't change state (e.g. the interrupt pending flag goes from "set" to "set", which doesn't count as a new interrupt).
davidv1992 wrote:
Brendan wrote: After reset, "floppy_controller_init()" can do the "version" command and see if the controller supports a FIFO or not; and if the controller does you can clear the LOCK bit (in case someone left it set), then configure the FIFO, then set the LOCK bit. If "version" says there's no FIFO you can skip this and everything else works the same, so there's no need to do a "unsupported floppy controller" message and give up.
Good to know, however for the moment I'm going to try to keep things simple, especially with some bug already around.
For simple, don't bother with the "version" command and don't worry about configuring the FIFO.. ;)
davidv1992 wrote:
Brendan wrote: The places where you wait for an IRQ look dodgy to me too. If/when the IRQ occurs you should stop waiting immediately, but your code looks like it waits for the entire time-out (and only then checks to see if the IRQ occurred while you were waiting). I'd assume that either your scheduler is incomplete or your IPC is incomplete (or both). You should be able to tell the scheduler "don't give me any CPU time until this delay has expired or the IRQ handler does something to cancel the delay". Otherwise you end up with slow drivers (waiting much longer than necessary) or wasting CPU time polling.
The way I handle the IRQ is indeed not straightforward, what I do is I call my timer driver, which changes task status to blocked and queues it for a resume after the time intervall, the IRQ handler just resumes the floppy task every time it gets an IRQ.
Doh - sorry (I didn't notice the "resume(FTID);" for some reason). That should work. :)


Cheers,

Brendan

Re: Floppy driver headache

Posted: Fri Nov 26, 2010 10:40 am
by davidv1992
It ain't the nicest way of solving it, but it resolved most of my problems with race conditions so, better hacky and correct than beautifull but non-functional.

However, in the wiki it says that # If (and only if) drive polling mode is turned on, send 4 Sense Interrupt commands (required). I turn PIO off in my configure and then turn on the lock bit to make sure I never have to do that again, shouldn't that work?

Re: Floppy driver headache

Posted: Fri Nov 26, 2010 10:53 am
by Brendan
Hi,
davidv1992 wrote:However, in the wiki it says that # If (and only if) drive polling mode is turned on, send 4 Sense Interrupt commands (required). I turn PIO off in my configure and then turn on the lock bit to make sure I never have to do that again, shouldn't that work?
No. The lock bit only effects FIFO configuration, so reset still resets the interrupt enable/disable and leaves you in polling mode.

It is possible to issue a "configure" within some time window, so that interrupts are enabled quickly enough after reset to prevent the need for the "sense interrupt status" commands; but that's racey, especially if your driver is running as a task that can be effected by other IRQs/tasks (e.g. you might send the "configure" immediately, but sometimes the scheduler might preeempt at the exact wrong time causing your "configure" to be too late).


Cheers,

Brendan

Re: Floppy driver headache

Posted: Fri Nov 26, 2010 1:39 pm
by davidv1992
Reimplemented a large part to brendan's advice, still it fails on reconfigure command on real hardware, again bochs is 100% fine with it, and I can't see why it would want more data for it (it is stuck on 0x90 in MSR). Furthermore my system fails to generate any sort of activity for any second reset, and so does bochs (No IRQ, no message of having to much input).

Code: Select all

#include <util.h>
#include <cmos.h>
#include <display.h>
#include <irq.h>
#include <idt.h>
#include <sched.h>
#include <proc.h>
#include <kcall_internal.h>

#define SRA			0x3F0
#define SRB			0x3F1
#define DOR			0x3F2
#define TDR			0x3F3
#define MSR			0x3F4
#define DSR			0x3F4
#define DATA_FIFO	0x3F5
#define DIR			0x3F7
#define CCR			0x3F7

#define SENSE_INTERRUPT		0x08
#define CONFIGURE			0x13
#define VERSION				0x10

unsigned int FTID;
unsigned int irq_waiting;
unsigned int timer_id=0;
unsigned long long timer_time=0;

void irq6()
{
	irq_waiting = 1;
	resume(FTID);
	print("IRQ6\n");
}

void wait_timeout(int msec)
{
	unsigned int ms, ls;
	unsigned long long intime;
	
	asm volatile("mov $0x32, %%eax\n"
				 "int $0x70" : "=a"(ms), "=b"(ls) : );
	
	intime = (((long long)ms)<<32) | ((long long)ls);
	intime += msec;
	
	ms = (timer_time >> 32) & 0xFFFFFFFF;
	ls = (timer_time) & 0xFFFFFFFF;
	
	asm volatile("int $0x70" : : "a"(KC_TIMER_DESTROY), "b"(timer_id), "c"(ms), "d"(ls) );
	
	timer_time = intime;
	
	ms = (intime >> 32) & 0xFFFFFFFF;
	ls = (intime) & 0xFFFFFFFF;
	
	asm volatile("int $0x70" : "=a"(timer_id) : "a"(KC_TIMER_CREATE), "b"(FTID), "c"(ms), "d"(ls) );
}

unsigned int controller_command(unsigned char command, unsigned char *inbytes, 
			unsigned int inlen, unsigned char *outbytes, unsigned int outlen,
			unsigned int wait_irq);

void controller_reset(unsigned int tparam)
{
	unsigned char dor_orig;
	unsigned char PIT_COUNT;
	unsigned char floppy_io[10];
	static unsigned int in_reset = 0;
	unsigned int succes;
	int i;
	
	if (in_reset)
		return;
	
	in_reset = 1;
	
	asm("cli");
	
	dor_orig = inb(DOR);
	outb(DOR, dor_orig & ~4);
	
	PIT_COUNT = inb(0x40);
	inb(0x40);
	
	if (PIT_COUNT + 10 < PIT_COUNT)
	{
		while (((signed char)inb(0x40)) < ((signed char)PIT_COUNT + 10)) inb(0x40);
	}
	else
	{
		while (inb(0x40) < PIT_COUNT+10) inb(0x40);
	}
	
	inb(0x40);
	
	outb(DOR, dor_orig);
	
	wait_timeout(3000);
	
	asm("sti");
	
	for (i=0; i<4; i++)
	{
		if (controller_command(SENSE_INTERRUPT, (unsigned char*)0, 0, floppy_io, 2, 0) != 0 || (floppy_io[0] & 0xFC) != 0xC0)
			PANIC("Failed to do sense after reset.\n");
	}
	
	outb(DOR, inb(DOR) | 8);
	
	if (controller_command(VERSION, (unsigned char*)0, 0, floppy_io, 1, 0) != 0 || floppy_io[0] != 0x90)
		PANIC("Unsuported floppy controller.\n");
	
	floppy_io[0] = floppy_io[2] = 0;
	floppy_io[1] = 0x47;
	
	for (i=3; i<10; i++)
		floppy_io[i]=0;
	
	if ((succes = controller_command(CONFIGURE, floppy_io, tparam, (unsigned char *)0, 0, 0)) != 0)
	{
		print_hex(succes);
		print("\n");
		print("Unable to reconfigure controller.\n");
		//PANIC("Unable to reconfigure controller.\n");
	}
}

unsigned int controller_command(unsigned char command, unsigned char *inbytes, 
			unsigned int inlen, unsigned char *outbytes, unsigned int outlen,
			unsigned int wait_irq)
{
	int i;
	
	if (wait_irq)
		irq_waiting = 0;
	
	for (i=0; i<16 && (inb(MSR) & 0xd0) != 0x80; i++)
		controller_reset(3);
	if ((inb(MSR) & 0xd0) != 0x80)
		return 1;
	
	outb(DATA_FIFO, command);
	
	while (inlen > 0)
	{
		for (i=0; i<128 && (inb(MSR) & 0x80) != 0x80; i++);
		
		if ((inb(MSR) & 0x80) != 0x80)
			return 2;
		
		if ((inb(MSR) & 0x50) != 0x10)
			return 3;
		
		outb(DATA_FIFO, *inbytes);
		inbytes++;
		inlen--;
	}
	
	if ((inb(MSR) & 0x20) == 0x20)
		PANIC("NDMA Set, no support for execution phase (yet).");
	
	if (wait_irq)
	{
		asm("cli");
		
		if (irq_waiting == 0)
			wait_timeout(wait_irq);
		
		if (idt_interruptable())
			asm("sti");
	}
	
	for (i=0; i<2048 && (inb(MSR) & 0x80) != 0x80; i++);
	
	if ((inb(MSR) & 0x80) != 0x80)
		return 4;
	
	while (outlen > 0)
	{
		for (i=0; i<128 && (inb(MSR) & 0x80) != 0x80; i++);
		
		if ((inb(MSR) & 0x80) != 0x80)
			return 5;
		
		if ((inb(MSR) & 0x50) != 0x50)
			return 6;
		
		*outbytes = inb(DATA_FIFO);
		outbytes++;
		outlen--;
	}
	
	for (i=0; i<1024  && (inb(MSR) & 0x80) != 0x80; i++) wait_timeout(100);
	
	if ((inb(MSR) & 0xD0) != 0x80)
		return inb(MSR);
	
	return 0;
}

void floppy_controller_init()
{
	/*unsigned char floppy_res;
	unsigned char floppy_in[3];
	*/int i;/*
	char succes;*/
	for (i=3; i<10; i++)
	{
		print("Try ");
		print_num(i);
		print("\n");
		controller_reset(i);
		wait_timeout(1000);
	}
}

void floppy_task()
{
	irq_set_handle(6, irq6);	
	
	floppy_controller_init();
	
	while (1);
}

void floppy_init(int KPID)
{
	FTID = process_spawn_task(KPID, (unsigned int)floppy_task);
}

basically changed floppy_reset to do all of the init steps on every reset (with a detection to prevent an endless chain of resets in case something goes awry).

Re: Floppy driver headache

Posted: Sat Nov 27, 2010 1:15 pm
by davidv1992
Ok wrote quite a bit of code to examine each write to a floppy controller port and the status byte just before that write. and also code doing same for reads from FIFO_DATA. I compared the output for both bochs and real hardware and I find the following descrepancies:

Code: Select all

//before this point it is all equal, what you'd expect from code before the configure command
BOCHS								REAL WORLD
0x00000080							0x00000080
W 0x000003F5 0x00000013			W 0x000003F5 0x00000013
0x00000090							0x00000090
W 0x000003F5 0x00000000			W 0x000003F5 0x00000000
0x00000090							0x00000090
W 0x000003F5 0x00000047			W 0x000003F5 0x00000047
0x00000090							0x00000090
W 0x000003F5 0x00000000			W 0x000003F5 0x00000000
0x00000080							0x00000090              // Discrepency
										// documentation tells me to expect
										// 0x00000080
The lines without a prefix are current values of the MSR register, the W lines describe writes to IO ports (data is bytes, port word, function for printing hex is hardcoded to dw). (W port data)

Has anyone seen this behaviour before on real hardware??

Version command gives back 0x90, so no weird controller either.

EDIT:
Added some code to force a version command to be done directly after the configure and it just executes that nicely and after that is has 0x80 on MSR. Is there a reason why it wont go back to 0x80 on MSR after a configure or is it basically that you can't trust the MSR cmd_busy bit?

Re: Floppy driver headache

Posted: Sun Nov 28, 2010 5:28 am
by Brendan
Hi,

I'm still not sure what the exact problem is.
davidv1992 wrote:Is there a reason why it wont go back to 0x80 on MSR after a configure or is it basically that you can't trust the MSR cmd_busy bit?
Is there some reason you can trust code that does this:

Code: Select all

for (i=0; i<128 && (inb(MSR) & 0x80) != 0x80; i++);
How long does it poll MSR before giving up (and is it long enough? - how can you know?), and why does it ignore the RQM flag if it takes too long (instead of handling it as a timeout and refusing to continue, or even doing a "puts("ERROR: Time-out while waiting from RQM to become set\n");)?

Also, my datasheets say:
"Before writing the host must examine the RQM and DIO bits of the Main Status Register. RQM, DIO must be equal to ‘‘1’’ and ‘‘0’’ respectively before command bytes may be written. RQM is set false after each write cycle until the received byte is processed."

I'd probably suggest something more like:

Code: Select all

int wait_for_ready_to_send(int timeout_length) {
    end_time = get_system_tick() + timeout_length;

    while( (inb(MSR) & 0xC0) != 0x80) {
        if(get_system_tick() > end_time) return TIME_OUT;
    }
    return OK;
}
Where "for (i=0; i<128 && (inb(MSR) & 0x80) != 0x80; i++);" is replaced with:

Code: Select all

    if(wait_for_ready_to_send(timeout) != OK) {
        /* Handle timeout and abort initialisation, or try "reset" or something */
    }
    /* Continue with whatever */
Also, I doubt that "static unsigned int in_reset = 0;" qualifies as adequate reentrancy control. At a minimum it should be atomic, and not like this:

Code: Select all

    if (in_reset)
      return;
   
   in_reset = 1;
Otherwise (possibly even for single-CPU, due to interrupts and/or task switching), the "if(in reset)" can be executed multiple times (e.g. by different CPUs, or by different threads, or maybe by a thread and an IRQ handler) before the "in_reset = 1;" part is executed by anything (which results in the controller being reset by multiple things at the same time, which would cause lots of problems). Unfortunately C doesn't support atomic operations on its own, so you'd probably need to write a library to support them, or use some inline assembly (e.g. something based on "lock cmpxchg" or "lock bts" or something).

It's also racey - if threadB calls "controller_reset()" while threadA is already executing it, then for threadB the function will return before threadA has finished doing the reset.

However, that alone isn't the whole problem. The floppy hardware can only do one thing at a time (even when there's 2 separate floppy drives attached to it), therefore the floppy driver can only ask the hardware to do one thing at a time, therefore "controller_reset()" should never be called while it's already in progress to begin with. Basically you need something like a FIFO queue of requests, where any number of requests can be put onto the FIFO queue and the driver takes requests from the queue one at a time (and does whatever it has to to satisfy the request). In this case "in_reset" isn't needed at all (and doesn't need to be atomic), but the code to insert a request onto the queue and remove a request from the queue does need to be atomic.

Of course it could be more complex than a simple FIFO queue - you could (eventually) add prioritised I/O to it, or optimise the order that requests are performed to maximise efficiency (e.g. minimise delays caused by head movement/seeks), or both.


Cheers,

Brendan

Re: Floppy driver headache

Posted: Sun Nov 28, 2010 9:15 am
by davidv1992
I do check DIO and MRQ before every read and write to the FIFO port. The loop that you pointed to as fishy can be qualified as that, it is indeed arbitrary to just do 128 polls and to quit after that (and that has been enough, cause 0x80 is set even on real hardware after that loop). BUT it does quit, the if statement just after that loop is responsible for reporting that error to whoever called it. Also the in_reset isn't meant for reentrancy in a multitask way but if my controller_command function finds the controller in an unexpected state it automatically does a reset and returns an error, and that reset could give me a stack overflow if i don't block it when we already were in a reset procedure, and i rather have that not happening, which is what the code you point at prevents. The problem i have is that on real hardware after the configure command i get a 0x90 instead of 0x80 on MSR.

Re: Floppy driver headache

Posted: Sun Nov 28, 2010 11:04 am
by Brendan
Hi,
davidv1992 wrote:I do check DIO and MRQ before every read and write to the FIFO port. The loop that you pointed to as fishy can be qualified as that, it is indeed arbitrary to just do 128 polls and to quit after that (and that has been enough, cause 0x80 is set even on real hardware after that loop). BUT it does quit, the if statement just after that loop is responsible for reporting that error to whoever called it.
I took another look, and you're right. Ironically, you not just right, you're actually are pounding the daylights out of the MSR in lots of weird and wonderful ways all over the place. For example, looping until RQM is set, then reading MSR again and checking if RQM is set again, then reading MSR a third time and checking DIO and CMD BSY. Is there a way to minimise the number of expensive/slow I/O port reads? Note: for legacy "ISA" hardware like the floppy controller, you can assume it costs roughly 1 us per I/O port access, which is about the same as 4000 normal instructions on a modern CPU.
davidv1992 wrote:Also the in_reset isn't meant for reentrancy in a multitask way but if my controller_command function finds the controller in an unexpected state it automatically does a reset and returns an error, and that reset could give me a stack overflow if i don't block it when we already were in a reset procedure, and i rather have that not happening, which is what the code you point at prevents.
Ah - I see it now - "controller_reset()" calls "controller_command()" which may call "controller_reset()", which may result in an endless loop. Why does "controller_command()" need to call "controller_reset()" up to 16 times (rather than just returning a "timeout" error)?
davidv1992 wrote:The problem i have is that on real hardware after the configure command i get a 0x90 instead of 0x80 on MSR.
I'm starting to wonder if you send the last byte for the configure command, and then test MSR before the hardware has had enough time to clear the CMD BSY flag. As a quick test (to prove this theory wrong), could you do "while( (MSR & 0x10) == 0x10) {}" to see if CMD BSY clears itself eventually?

If that doesn't help, I can't think of anything else, and the only option left would be "slash and burn" refactoring (maybe including things like "#define RQM_FLAG 0x80" instead of magic numbers; and possibly starting with a "send_command_byte()" function that "controller_reset()" calls directly, instead of putting bytes into an array and using a more generic function to send commands).


Cheers,

Brendan

Re: Floppy driver headache

Posted: Sun Nov 28, 2010 1:05 pm
by davidv1992
Brendan wrote: I'm starting to wonder if you send the last byte for the configure command, and then test MSR before the hardware has had enough time to clear the CMD BSY flag. As a quick test (to prove this theory wrong), could you do "while( (MSR & 0x10) == 0x10) {}" to see if CMD BSY clears itself eventually?
Your hypothesis is right, it actually does clear eventually (fast enough for me not to notice by eye). That is one bit weird timing you found there (I for one would have expected that the RQM is the last status bit to be set).

I do realise that for production code I will need to rewrite large chunks, both for readability and efficiency, but this was the quick-and-dirty easy way to do all the checks. I've also by now realised that there is a lot of code out there that is far less rigorous in it's checks.

I also found out that some real world hardware manages to throw an interrupt the moment you clear the reset flag in the DOR, might be usefull to make a note of this in the wiki as it might lead to problems for some ways of implementing a floppy controller reset.

Re: Floppy driver headache

Posted: Sun Nov 28, 2010 11:18 pm
by Brendan
Hi,
davidv1992 wrote:
Brendan wrote: I'm starting to wonder if you send the last byte for the configure command, and then test MSR before the hardware has had enough time to clear the CMD BSY flag. As a quick test (to prove this theory wrong), could you do "while( (MSR & 0x10) == 0x10) {}" to see if CMD BSY clears itself eventually?
Your hypothesis is right, it actually does clear eventually (fast enough for me not to notice by eye). That is one bit weird timing you found there (I for one would have expected that the RQM is the last status bit to be set).
I'm glad I finally got something right. To be honest I was getting annoyed at myself for not being able to see the problem. :)


Cheers,

Brendan

Re: Floppy driver headache

Posted: Mon Nov 29, 2010 6:15 am
by davidv1992
This is getting a wonderfull(sarcastic) first experience writing an actual driver. Managed to get bochs floppy controller to lock up and my real hardware to report success without actually having it (YAY).

I'm trying to read logical sector 0 (cylinder 0 head 0 sector 1) using dma. I have the nagging suspicion the problem is somewhere in me setting up the DMA wrong but so far I ain't seeing it.

Here is the floppy drivers code:

Code: Select all

#include <util.h>
#include <cmos.h>
#include <display.h>
#include <irq.h>
#include <idt.h>
#include <sched.h>
#include <proc.h>
#include <kcall_internal.h>
#include <isadma.h>
#include <memory.h>

#define SRA			0x3F0
#define SRB			0x3F1
#define DOR			0x3F2
#define TDR			0x3F3
#define MSR			0x3F4
#define DSR			0x3F4
#define DATA_FIFO	0x3F5
#define DIR			0x3F7
#define CCR			0x3F7

#define SENSE_INTERRUPT		0x08
#define CONFIGURE			0x13
#define VERSION				0x10
#define RECALIBRATE			0x07
#define SPECIFY				0x03

unsigned int FTID;
unsigned int irq_waiting;
unsigned int timer_id=0;
unsigned long long timer_time=0;

// Controller status
unsigned char motor_on[2];
unsigned char drive_avail[2];
unsigned long long motor_timeout[2];
unsigned char current_drive;

// Buffer
void *dmabuf;

void irq6()
{
	irq_waiting = 1;
	resume(FTID);
	print("IRQ6\n");
}

void wait_timeout(int msec)
{
	unsigned int ms, ls;
	unsigned long long intime;
	
	asm volatile("mov $0x32, %%eax\n"
				 "int $0x70" : "=a"(ms), "=b"(ls) : );
	
	intime = (((unsigned long long)ms)<<32) | ((unsigned long long)ls);
	intime += msec;
	
	ms = (timer_time >> 32) & 0xFFFFFFFF;
	ls = (timer_time) & 0xFFFFFFFF;
	
	asm volatile("int $0x70" : : "a"(KC_TIMER_DESTROY), "b"(timer_id), "c"(ms), "d"(ls) );
	
	timer_time = intime;
	
	ms = (intime >> 32) & 0xFFFFFFFF;
	ls = (intime) & 0xFFFFFFFF;
	
	asm volatile("int $0x70" : "=a"(timer_id) : "a"(KC_TIMER_CREATE), "b"(FTID), "c"(ms), "d"(ls) );
}

unsigned int controller_command(unsigned char command, unsigned char *inbytes, 
			unsigned int inlen, unsigned char *outbytes, unsigned int outlen,
			unsigned int wait_irq);

void cont_change_drive(unsigned char drive)
{
	if (drive > 1)
		PANIC("No such drive.");
	
	current_drive = drive;
	outb(DOR, (inb(DOR) & ~3) | drive);
}

void cont_motor_on(unsigned char drive)
{
	if (drive > 1)
		PANIC("No such drive.");
	
	outb(DOR, inb(DOR) | (1 << (drive+4)));
	motor_on[drive] = 1;
	
	wait_timeout(300);
}

void update_motor_timeout(unsigned char drive)
{
	unsigned int ls, ms;
	unsigned long long curtime;
	
	if (drive > 1)
		PANIC("No such drive.");
	
	asm("int $0x70" : "=a"(ms), "=b"(ls) : "a"(KC_TIMER_GET_TIME));
	
	curtime = (((unsigned long long)ms) << 32) | ((unsigned long long)ls);
	
	curtime += 2000;
	
	motor_timeout[drive] = curtime;
}

void cont_motor_off(unsigned char drive)
{
	if (drive > 1)
		PANIC("No such drive.");
	
	outb(DOR, inb(DOR) & ~(1 << (drive+4)));
	motor_on[drive] = 0;
}
			
void drive_recalibrate(unsigned char drive)
{
	int i;
	unsigned char floppy_io[2];
	
	if (drive > 1)
		PANIC("No such drive.");
	if (drive_avail[drive] == 0)
		PANIC("No such drive.");
	
	for (i=0; i<4; i++)
	{
		if (current_drive != drive)
		{
			cont_change_drive(drive);
		}

		if (motor_on[drive] == 0)
			cont_motor_on(drive);

		floppy_io[0] = drive;
		if (controller_command(RECALIBRATE, floppy_io, 1, (unsigned char*)0, 0, 4000) != 0)
			continue;
		
		if (controller_command(SENSE_INTERRUPT, (unsigned char*)0, 0, floppy_io, 2, 0) != 0)
			continue;
		
		if ((floppy_io[0] & 0x20) == 0x20)
			break;
	}
	
	if (i == 4)
		PANIC("Couldn't recalibrate drive.");
	
	update_motor_timeout(drive);
}

void controller_reset()
{
	unsigned char floppy_io[10];
	static unsigned int in_reset = 0;
	unsigned int succes;
	int i;
	
	// Stop recursive reset
	if (in_reset)
		return;
	in_reset = 1;
	
	// Reset controller and wait for the IRQ
	asm("cli");
	
	outb(DOR, 0);	
	outb(DOR, 0x0C);
	
	wait_timeout(3000);
	
	asm("sti");
	
	// Update status variables to reflect new situation.
	motor_on[0] = motor_on[1] = 0;
	motor_timeout[0] = motor_timeout[1] = 0;
	current_drive = 0;
	
	// Four sense interrupts to clear interrupt flags
	for (i=0; i<4; i++)
	{
		if (controller_command(SENSE_INTERRUPT, (unsigned char*)0, 0, floppy_io, 2, 0) != 0 || (floppy_io[0] & 0xFC) != 0xC0)
			PANIC("Failed to do sense after reset.\n");
	}
	
	// Verify version of the controller
	if (controller_command(VERSION, (unsigned char*)0, 0, floppy_io, 1, 0) != 0 || floppy_io[0] != 0x90)
		PANIC("Unsuported floppy controller.\n");
	
	// And configure it for our needs.
	floppy_io[0] = floppy_io[2] = 0;
	floppy_io[1] = 0x47;
	floppy_io[3] = VERSION;
	if ((succes = controller_command(CONFIGURE, floppy_io, 3, floppy_io, 0, 0)) != 0)
	{
		PANIC("Unable to reconfigure controller.\n");
	}
	
	// And do a specify
	floppy_io[0] = 0x80;
	floppy_io[1] = 0x1E;
	if ((succes = controller_command(SPECIFY, floppy_io, 2, (unsigned char*)0, 0, 0)) != 0)
	{
		PANIC("Unable to specify.");
	}
	
	// We're no longer in the reset procedure.
	in_reset = 0;
}

unsigned int controller_command(unsigned char command, unsigned char *inbytes, 
			unsigned int inlen, unsigned char *outbytes, unsigned int outlen,
			unsigned int wait_irq)
{
	int i;
	
	// If we are going to wait for an irq make sure flag is clear to start with
	if (wait_irq)
		irq_waiting = 0;
	
	// Check state of the controller
	if ((inb(MSR) & 0xd0) != 0x80)
	{
		controller_reset();
		return 1;
	}

	// Output command
	outb(DATA_FIFO, command);
	
	// Output arguments
	while (inlen > 0)
	{
		for (i=0; i<128 && (inb(MSR) & 0x80) != 0x80; i++);
		
		if ((inb(MSR) & 0x80) != 0x80)
			return 2;
		
		if ((inb(MSR) & 0x50) != 0x10)
			return 3;

		outb(DATA_FIFO, *inbytes);
		inbytes++;
		inlen--;
	}
	
	// Verify that we don't need to do some sort of execution phase
	if ((inb(MSR) & 0x20) == 0x20)
		PANIC("NDMA Set, no support for execution phase (yet).");
	
	// Wait for an irq if applicable
	if (wait_irq)
	{
		asm("cli");
		
		if (irq_waiting == 0)
			wait_timeout(wait_irq);
		
		if (idt_interruptable())
			asm("sti");
	}
	
	// Busy loop till IO is possible again
	for (i=0; i<2048 && (inb(MSR) & 0x80) != 0x80; i++);
	
	// Verify that we actually got into that state
	if ((inb(MSR) & 0x80) != 0x80)
		return inb(MSR);
	
	// Read any output bytes
	while (outlen > 0)
	{
		for (i=0; i<128 && (inb(MSR) & 0x80) != 0x80; i++);
		
		if ((inb(MSR) & 0x80) != 0x80)
			return 5;
		
		if ((inb(MSR) & 0x50) != 0x50)
			return 6;

		*outbytes = inb(DATA_FIFO);
		outbytes++;
		outlen--;
	}
	
	// Give it time to get out of command mode
	for (i=0; i<1024  && (inb(MSR) & 0x80) != 0x80; i++);
	
	while ((inb(MSR) & 0x10) == 0x10);
	
	// Verify state.
	if ((inb(MSR) & 0xD0) != 0x80)
		return 7;
	
	return 0;
}

void floppy_controller_init()
{
	unsigned char drive_types;
	
	// Find out which drives we have
	drive_types=cmos_read_byte(0x10);
	
	// Primary drive
	if ((drive_types & 0xF0) == 0x40)
		drive_avail[0] = 1;
	else
		drive_avail[0] = 0;
	
	// Secondary drive
	if ((drive_types & 0x0F) == 0x04)
		drive_avail[1] = 1;
	else
		drive_avail[1] = 0;
	
	// Reset controller
	controller_reset();
	
	// Set datarate
	outb(DSR, inb(DSR) & ~0x83);
	
	// Recalibrate available drives
	if (drive_avail[0] == 1)
		drive_recalibrate(0);
	if (drive_avail[1] == 1)
		drive_recalibrate(1);
}

void floppy_task()
{
	unsigned char floppy_io[8], floppy_o[7];
	unsigned char succes;
	
	// Register IRQ6 handler
	irq_set_handle(6, irq6);	
	
	// Initialize controller
	floppy_controller_init();
	
	print("Pong\n");
	
	// Request a dma buffer
	dmabuf = memory_allocate_buffer();
	((unsigned char*)dmabuf)[0] = 0;
	((unsigned char*)dmabuf)[1] = 0;
	
	
	// Try to read a sector
	print_hex(inb(0x08));
	print("\n");
	DMA_transfer(2, (unsigned int)dmabuf, 512, 1, 0);
	print_hex(inb(0x08));
	print("\n");
	floppy_io[0] = 0;
	floppy_io[1] = 0;
	floppy_io[2] = 0;
	floppy_io[3] = 1;
	floppy_io[4] = 2;
	floppy_io[5] = 1;
	floppy_io[6] = 0x1B;
	floppy_io[7] = 0xFF;
	while ((succes = controller_command(0x46, floppy_io, 8, floppy_o, 7, 60000)))
	{
		print_num(succes);
		print(" ");
		print_hex(inb(0x08));
		print(" fail\n");
		PANIC("");
		cont_motor_on(0);
	}
	
	print_hex(floppy_o[0]);
	print(" ");
	print_hex(floppy_o[1]);
	print(" ");
	print_hex(floppy_o[2]);
	print("\n");
	print_hex(floppy_o[3]);
	print(" ");
	print_hex(floppy_o[4]);
	print(" ");
	print_hex(floppy_o[5]);
	print("\n");
	print_hex(((unsigned char*)dmabuf)[0]);
	print(" ");
	print_hex(((unsigned char*)dmabuf)[1]);
	print("\n");
	
	while (1);
}

void floppy_init(int KPID)
{
	FTID = process_spawn_task(KPID, (unsigned int)floppy_task);
}
and the DMA driver:

Code: Select all

#include <util.h>
#include <idt.h>

#define MASK_PORT_03	0x0A
#define MASK_PORT_47	0xD4
#define FLIP_FLOP_03	0x0C
#define FLIP_FLOP_47	0xD8
#define MODE_PORT_03	0x0B
#define MODE_PORT_47	0x06

unsigned short BasePorts[8] =
{
	0x00, 0x02, 0x04, 0x06,
	0xC0, 0xC4, 0xC8, 0xCC
};

unsigned short CountPorts[8] =
{
	0x01, 0x03, 0x05, 0x07,
	0xC2, 0xC6, 0xCA, 0xCE
};

unsigned short PagePorts[8] =
{
	0x87, 0x83, 0x81, 0x82,
	0x8F, 0x8B, 0x89, 0x8A
};

void DMA_transfer(unsigned char channel, unsigned int base, unsigned int size,
					unsigned int mode, unsigned int iswrite)
{
	// Verify parameters
	if (channel > 7)
		PANIC("Illegal ISA-DMA channel.");
	if (base > 0x00FFFFFF)
		PANIC("Trying ISA-DMA from high memory.");
	if (size > 0x01000000)
		PANIC("Trying to transfer more than 64k.");
	if ((base & 0x00FF0000) != ((base + size) & 0x00FF0000))
		PANIC("Trying ISA-DMA accross 64k boundary.");
	if (mode > 2)
		PANIC("Unsupported ISA-DMA mode.");


	// CRITICAL SECTION
	asm("cli");
	
	// Mask the chosen channel
	if (channel < 4)
		outb(MASK_PORT_03, channel | 0x4);
	else
		outb(MASK_PORT_47, (channel-4) | 0x4);
	
	// Reset flipflop
	if (channel < 4)
		outb(FLIP_FLOP_03, 0xFF);
	else
		outb(FLIP_FLOP_47, 0xFF);
	
	// Load base address
	outb(BasePorts[channel], base & 0xFF);
	outb(BasePorts[channel], (base >> 8) & 0xFF);
	
	// Reset flipflop
	if (channel < 4)
		outb(FLIP_FLOP_03, 0xFF);
	else
		outb(FLIP_FLOP_47, 0xFF);
	
	// Load count
	size--;
	outb(CountPorts[channel], size & 0xFF);
	outb(CountPorts[channel], (size >> 8) & 0xFF);
	
	// Load page register
	outb(PagePorts[channel], (base >> 16) & 0xFF);
	
	// Load mode register
	if (channel < 4)
		outb(MODE_PORT_03, channel | (mode << 6) | (iswrite == 1 ? 4 : 8));
	else
		outb(MODE_PORT_47, (channel-4) | (mode << 6) | (iswrite == 1 ? 8 : 4));
	
	// Unmask
	if (channel < 4)
		outb(MASK_PORT_03, channel);
	else
		outb(MASK_PORT_47, channel-4);
	
	// END CRITICAL SECTION
	if (idt_interruptable())
		asm("sti");
}
Hopefully it will be more obvious to someone who has already done this stuff once.

Also it is unclear to me whether the example code on the dma page in the wiki is correct, if I interpret it according to the info given on the same page about the registers it masks the wrong channel during setup.

Edit: nvm the dma problem, I was wrong somewhere, in my handling of the differentiation between reads and writes to be exact.