Intel 8254x Receive Problems

Question about which tools to use, bugs, the best way to implement a function, etc should go here. Don't forget to see if your question is answered in the wiki first! When in doubt post here.
Post Reply
kc9zda
Posts: 9
Joined: Sat Jul 20, 2013 10:06 pm

Intel 8254x Receive Problems

Post by kc9zda »

I'm having some trouble receiving packets. I am trying to use the 82540EM network adapter on Virtualbox. The emulated adapter is bridged to the adapter on the host machine, so there are packets to be received. It seems to receive packets for a few seconds, but quits. There are also lots of dropped packets with 0 length, without an EOP flag set in the status, or status = 1. I adapted my driver partially from an example linked on the wiki.

I've only included the functions which I believe to not be functioning correctly. I know the interrupt processing is working, and I know the MMIO is working. I've tried setting the head and tail registers to different values and different ways of polling RX. With some values it seems to be signalling an interrupt when there's a packet, but the descriptors say otherwise. With others, it will recognize packets, but after about 20 to 30 seconds, it stops. Is the emulation of the chip just that bad, or is my code bad?

Here is the relevant code:

Code: Select all

void i8254x_init_card(int i) { // i is an index to an the array of adapters
	pci_register_irq_handler_idx(i8254x_cards[i].pci_idx,i8254x_irq_handler);
	pci_write_config_dword_idx(i8254x_cards[i].pci_idx,PCI_CONFIG_COMMAND,(pci_read_config_dword_idx(i8254x_cards[i].pci_idx,PCI_CONFIG_COMMAND) | (1<<2))); // enable pci busmastering, if it helps
	i8254x_mmio_write32(i,I8254X_REG_IMS,0x1f6dc); // set interrupt mask; should be 0x1ffff, 0x17fff or maybe 0x1f6dc?
	i8254x_init_mta(i);  // init multicast table
	i8254x_set_link_up(i); // set CTRL.SLU, CTRL.ASDE and clear CTRL.LRST
	i8254x_init_rx(i); // init receive
	i8254x_init_tx(i); // init transmit
	}

void i8254x_init_rx(int i) {
	u8 mac[6];

	i8254x_cards[i].rx_descs = pfa_alloc(I8254X_RXDESC_PGCOUNT); // allocate memory (in pages) for rx descriptors
	for (int j=0;j<(256*I8254X_RXDESC_PGCOUNT);j++) {  // iterate through each rx descriptor
		i8254x_cards[i].rx_descs[i].address = (u32)PG_V2P(pfa_alloc(2)); // allocate 8192 byte packet, and write physical address
		i8254x_cards[i].rx_descs[i].status = 0; // clear status so the controller knows it can be used
		}
	i8254x_read_mac(i, &mac); // read mac from eeprom
	i8254x_mmio_write32(i, I8254X_REG_RAL + (i*8), ((mac[3]<<24) | (mac[2]<<16) | (mac[1]<<8) | mac[0])); // set up MAC filter
	i8254x_mmio_write32(i, I8254X_REG_RAH + (i*8), (0x80000000 | (mac[5]<<8) | mac[4]));
	i8254x_mmio_write32(i, I8254X_REG_RDBAH, 0); // no 64 bit here
	i8254x_mmio_write32(i, I8254X_REG_RDBAL, (u32) PG_V2P(i8254x_cards[i].rx_descs)); // set pointer to descriptor array
	i8254x_mmio_write32(i, I8254X_REG_RDLEN, I8254X_RXDESC_COUNT * 16); //I8254X_RXDESC_PGCOUNT * I8254X_RXDESC_PER_PAGE * 16
	i8254x_mmio_write32(i, I8254X_REG_RDH, 0); // set head to 0, as the manual says
	i8254x_mmio_write32(i, I8254X_REG_RDT, I8254X_RXDESC_COUNT); // set tail to the index after the last descriptor, as the manual says
	i8254x_cards[i].rx_tail = 0; // set our tail to 0, because that works
	i8254x_mmio_write32(i, I8254X_REG_RCTL, (I8254X_RCTL_SBP | I8254X_RCTL_UPE | I8254X_RCTL_MPE | I8254X_RDMTS_HALF | I8254X_RCTL_SECRC | I8254X_RCTL_LPE | I8254X_RCTL_BAM | I8254X_RCTL_BSIZE_8192)); // set the receive control register (promisc, 8K pkt size)
	}

int i8254x_irq_handler(u8 irq) {
	int status = 0;

	for (int i=0; i<i8254x_count; i++) {
		if (i8254x_cards[i].pci_irq == irq) status = i8254x_irq_handler2(i);
		if (status != 0) break;
		}
	return status;
	}

int i8254x_irq_handler2(int idx) {
	u32 icr = i8254x_mmio_read32(idx,I8254X_REG_ICR); // read interrupt code
	u32 sicr = icr; // save icr for later

	if (!icr) return 0; // return 0 (irq not processed) if no interrupt flags set
	if (icr & I8254X_ICR_LSC) { // link status change
		printf("i8254x_irq_handler2: link status change\n");
		icr &= ~(I8254X_ICR_LSC); // clear bit
		i8254x_enable_rx(idx);
		}
	if ((icr & I8254X_ICR_RXT0) || (icr & I8254X_ICR_RXDMT0)) { // packet rx'd
		icr &= ~(I8254X_ICR_RXT0 | I8254X_ICR_RXDMT0); // clear bits
		i8254x_rx_poll(idx);
		}
	printf("i8254x_irq_handler2: rxfifo status: h=%u t=%u(%u)\n",i8254x_mmio_read32(idx,I8254X_REG_RDH),i8254x_mmio_read32(idx,I8254X_REG_RDT),i8254x_cards[idx].rx_tail);
	printf("i8254x_irq_handler2: %u (%u)\n",(unsigned int)icr,(unsigned int)sicr);
	i8254x_mmio_read32(idx,I8254X_REG_ICR); // clear interrupts; is this necessary?
	return 1; // tell caller the irq was handled
	}

void i8254x_enable_rx(int i) {
	i8254x_mmio_write32(i, I8254X_REG_RCTL, i8254x_mmio_read32(i, I8254X_REG_RCTL) | (I8254X_RCTL_EN));
	}

void i8254x_rx_poll(int idx) {
	printf("i8254x_rx_poll: status=%u\n",i8254x_cards[idx].rx_descs[i8254x_cards[idx].rx_tail].status);
	while ((i8254x_cards[idx].rx_descs[i8254x_cards[idx].rx_tail].status & I8254X_RXDSTS_DD)) { // while the tail packet has STATUS.DD set
		u16 pkt_len = i8254x_cards[idx].rx_descs[i8254x_cards[idx].rx_tail].length; // packet length
		bool dropflag = FALSE;

		if (pkt_len < 60) { // small packet
			//printf("i8254x_rx_poll: small packet, %u bytes; dropping\n",pkt_len);
			printf("d");
			dropflag = TRUE;
			}
		if (!(i8254x_cards[idx].rx_descs[i8254x_cards[idx].rx_tail].status & I8254X_RXDSTS_EOP)) { // eop not set
			//printf("i8254x_rx_poll: eop not set? dropping\n");
			printf("D");
			dropflag = TRUE;
			}
		if (i8254x_cards[idx].rx_descs[i8254x_cards[idx].rx_tail].errors) { // packet had errors
			printf("i8254x_rx_poll: packet had errors; dropping\n");
			dropflag = TRUE;
			}
		if (!dropflag) { // packet has no reason to be dropped
			printf("i8254x_rx_poll: good packet # %u, %u bytes\n",i8254x_cards[idx].rx_count,pkt_len);
			i8254x_cards[idx].rx_count++;
			}
		i8254x_cards[idx].rx_descs[i8254x_cards[idx].rx_tail].status = 0; // inform hardware descriptor is ready to use
		i8254x_cards[idx].rx_tail = (i8254x_cards[idx].rx_tail + 1) % I8254X_RXDESC_COUNT; // set tail to next descriptor, wrap if necessary
		i8254x_mmio_write32(idx, I8254X_REG_RDT, i8254x_cards[idx].rx_tail); // inform hardware of new tail
		i8254x_mmio_read32(idx, I8254X_REG_ICR); // clear interrupts?
		}
	}
User avatar
sleephacker
Member
Member
Posts: 97
Joined: Thu Aug 06, 2015 6:41 am
Location: Netherlands

Re: Intel 8254x Receive Problems

Post by sleephacker »

Is your receive descriptor buffer physically contiguous, as it should be?
The card also supports a feature called "null descriptor padding", where if you write a "null address" (I assume just 0) in the descriptor it will set the DD bit but leave the rest unchanged.

BTW, the tail pointer should also be initialized to 0 because the next index after the last one in a circular buffer is 0 again, and because "queue is empty" implies "tail == head" and vice versa.
kc9zda
Posts: 9
Joined: Sat Jul 20, 2013 10:06 pm

Re: Intel 8254x Receive Problems

Post by kc9zda »

I wasn't sure about the buffers being physically contiguous, so I knocked the buffer size down to one page, where I can ensure a buffer is physically contiguous.

I did find that a lot of the receive descriptors had null buffer addresses, even though they should have been initialized.

I set the receive tail to its value from my interpretation of this recommendation in the development manual, section 14.4:
Head should point to the first valid receive descriptor in the descriptor ring and tail should point to one descriptor beyond the last valid descriptor in the descriptor ring
I found that there were lots of descriptors with null addresses. Here was part of the problem:

Code: Select all

i8254x_cards[i].rx_descs[i].address = (u32)PG_V2P(pfa_alloc(1)); // the index for rx_descs should have been j, not i.
I am still having a problem where it only receives for about 5 to 10 seconds and stops. It seems to stop receiving at a number of packets ending in f: like 0xf, 0x1f, 0x2f, 0x4f. When it stops, the tail pointer's lowest 4 bits are zero.
User avatar
sleephacker
Member
Member
Posts: 97
Joined: Thu Aug 06, 2015 6:41 am
Location: Netherlands

Re: Intel 8254x Receive Problems

Post by sleephacker »

sleephacker wrote:Is your receive descriptor buffer physically contiguous, as it should be?
Actually now that I think about it, everything that can be accessed by the card should be physically contiguous, because it doesn't have a clue about virtual memory.
kc9zda wrote:I set the receive tail to its value from my interpretation of this recommendation in the development manual, section 14.4:
Head should point to the first valid receive descriptor in the descriptor ring and tail should point to one descriptor beyond the last valid descriptor in the descriptor ring
Draw a circle, devide it in a number of descriptors. Draw an X at the first one, then keep following the circle clockwise until you are at the last descriptor you haven't visited yet, which should be just before the X. Now go one descriptor beyond that, and you're back at X. IOW, head and tail should be equal, and therefore zero. Another quote from the same manual, section 3.2.6:
Note that tail should still point to an area in the descriptor ring (somewhere between RDBA and RDBA + RDLEN). This is because tail points to the location where software writes the first new descriptor.
The manual is quite vague, but head and tail should be initialized to the same value, and tail shouldn't point to a non-existent descriptor.
kc9zda wrote:I am still having a problem where it only receives for about 5 to 10 seconds and stops. It seems to stop receiving at a number of packets ending in f: like 0xf, 0x1f, 0x2f, 0x4f. When it stops, the tail pointer's lowest 4 bits are zero.
How exactly does it stop? Do you stop receiving interrupts? What happens if you just scan the entire ringbuffer for nonzero status bytes?

Also, what are your constants set to? If you use one page for everything to ensure it's physically contiguous, then I8254X_RXDESC_PGCOUNT should be 1, I8254X_RXDESC_COUNT should be 256, and you should tell the card to use 4096 byte buffers instead of 8192 in your original code. Also, have you enabled BSEX in the RCTL register, which is necessary for buffer sizes of 4096 and more?

BTW, don't read the ICR at the end of your i8254x_rx_poll function, you already read it in your IRQ handler, so it'll either have no effect at all or cause you to miss interrupts.
kc9zda
Posts: 9
Joined: Sat Jul 20, 2013 10:06 pm

Re: Intel 8254x Receive Problems

Post by kc9zda »

The 4k pages should be physically contiguous because of the way my page frame allocator works. The amount of pages requested is guaranteed to be virtually contiguous, but one cannot assume the pages are physically contiguous to each other. Each page allocated is guaranteed to be physically contiguous within itself.

I tried setting RDT to 0, which doesn't seem to send any receive interrupts at all. My understanding is the chip interprets "one past the last descriptor" as zero.

Sorry for not being to specific as to how it stops, it seems to stop sending interrupts. The IRQ handler code that calls the 8254x's irq handler sends an EOI to the PIC. If I understand this correctly, the 8254x's interrupt line is level triggered, so once the EOI goes out, if it is still high, it signals the interrupt again. It would still be high if there are more packets to be read, for example, if RXT0 or some other interrupt were high.

Here's some of the constants:

Code: Select all

#define PG_SIZE 4096

#define I8254X_RXDESC_PGCOUNT 1
#define I8254X_RXDESC_PER_PAGE (PG_SIZE / 16) // 256
#define I8254X_RXDESC_COUNT (I8254X_RXDESC_PGCOUNT * I8254X_RXDESC_PER_PAGE) // 256

#define I8254X_RCTL_BSIZE_4096 ((3 << 16) | (1 << 25))  // (1 << 25) = BSEX
#define I8254X_RCTL_BSIZE_8192 ((2 << 16) | (1 << 25))
I'm now only reading the ICR register at the beginning of the interrupt handler.

Attached is a screenshot of the output. There is no other output after this. The first line is printed when an interrupt is first received, it tells the values of the ISR and IRR registers of the PIC, and what it figures was the interrupt. The second line tells the status byte of the tail descriptor. The third line has the values of the head, tail on the chip, and tail in the driver. The fourth line prints the unhandled ICR flags and the original ICR flags. No other lines are printed after this.

Out of curiosity, I tried setting the RDT register to 16 times the rx_tail field. It received exactly 256 packets every time, so it stopped exactly when the tail goes back around. I am assuming this isn't a solution.
Attachments
os_screenshot1.png
os_screenshot1.png (2.15 KiB) Viewed 3402 times
User avatar
sleephacker
Member
Member
Posts: 97
Joined: Thu Aug 06, 2015 6:41 am
Location: Netherlands

Re: Intel 8254x Receive Problems

Post by sleephacker »

Are you sure that your constants for your registers are all correct?
I have an (admittedly far-fetched) theory that you're writing to RDLEN instead of RDT, which is only a small typo in the actual register offset. This would explain why it doesn't work when you set it to 0, why it works when you increment it by 16 (the length of a descriptor) until you set it to 0 again, and why it sort of worked in the past when you initialized it to 256, in which case the fact that virtual box caches the descriptors and won't look at RDLEN again until its caches run out explains why it worked for a few seconds. It's far fetched but it fits almost perfectly.
kc9zda
Posts: 9
Joined: Sat Jul 20, 2013 10:06 pm

Re: Intel 8254x Receive Problems

Post by kc9zda »

A typo would be possible. I probably worked on at least half of this after midnight. Here's the constants

Code: Select all

#define I8254X_REG_RDBAL 0x2800
#define I8254X_REG_RDBAH 0x2804
#define I8254X_REG_RDLEN 0x2808
#define I8254X_REG_RDH 0x2810
#define I8254X_REG_RDT 0x2818
I double checked the manual and these look right. I double checked my MMIO code, which looks fine.

I took a look at the virtualbox source code for the chip, and if it is caching, it looks like the maximum descriptors that it caches would be 16. That's in the latest version though. My virtualbox is back at 4.3.10. I'm trying with 8 descriptors, since it is a multiple of 128 bytes, but doesn't have any descriptor indexes with 0 in all of the last four bits. With 8 descriptors, it receives many packets, but stops randomly now. It's like the interrupts aren't arriving.
User avatar
sleephacker
Member
Member
Posts: 97
Joined: Thu Aug 06, 2015 6:41 am
Location: Netherlands

Re: Intel 8254x Receive Problems

Post by sleephacker »

Yep, those constants are correct.

I'm kind of clueless, but I have a few ideas on how you could try to narrow down the cause:
  • The card has statistics registers that count the number of packets received, dropped/missed, etc. See section 13.7 of the manual. I suggest periodically polling a few relevant statistics registers and saving them, then print them out every time they change.
  • After it stops sending interrupts, try:
    • Periodically polling the ICR to see if the device just didn't send interrupts or if they didn't arrive.
    • Setting any valid bit in the ICS (Interrupt Cause Set) register, which should generate the interrupt corresponding to that bit if that bit is also set in the interrupt mask.
    • Periodically scanning the receive descriptor ring for nonzero status bytes, to see if the card is still doing its job but just not generating interrupts.
    • Resetting / reinitializing the card, to see if anything is permanently broken.
  • Try running a linux with the same virtualbox setup (at least for networking), to see if it's perhaps a bug with virtualbox or some other part of your setup.
  • If possible try testing on real hardware as well.
  • If all else fails, maybe try updating virtualbox, but I highly doubt if that's going to fix it.
kc9zda
Posts: 9
Joined: Sat Jul 20, 2013 10:06 pm

Re: Intel 8254x Receive Problems

Post by kc9zda »

I tried looking at the missed packet count statistical counter, and it didn't change. I didn't try the received packet counter, so I might try that.

I tried running ArchLinux on a vm with the same chip, and bridged to the host's NIC. Worked fine.

I tried resetting the card during initialization, no change.

I haven't yet tried polling the ICR, changing the ICS, or scanning the ring. I don't think I have one of those cards lying around anywhere. I'll try those things, and if it doesn't work, I'll probably put networking on the back burner. I've got to work on other things, like the file system. Thank you so much for all of your help.
linuxyne
Member
Member
Posts: 211
Joined: Sat Jul 02, 2016 7:02 am

Re: Intel 8254x Receive Problems

Post by linuxyne »

There seems to be an error in the handling of the hw and sw tails.

RDT is the index of the descriptor which the hw is not allowed to touch - i.e. it is the descriptor beyond the 'last' descriptor in the chain the hw can process, where last does not necessarily mean the descriptor at the index I8254X_RXDESC_COUNT - 1.

The initialization given is:

Code: Select all

RDH = 0;
RDT = I8254X_RXDESC_COUNT;
rx_tail = 0;
If a single descriptor is used upon the arrival of a packet, the RDH moves to 1.
The function i8254x_rx_poll then sets the RDT = rx_tail + 1 = 1 = RDH, effectively filling the entire buffer.

The function is prone to setting RDT the same as RDH. The card remains operational as long as RDH remains ahead of RDT, but at some point in time, due to lack of packets, the function will process a rx_tail just before RDH, increment rx_tail and save it as RDT.

We can instead initialize as below:

Code: Select all

RDH = 0;
rx_tail = RDT = I8254X_RXDESC_COUNT - 1;
Within i8254x_rx_poll, increment rx_tail (modulo the count) first, then process that descriptor, restore it, and save the rx_tail into RDT.
kc9zda
Posts: 9
Joined: Sat Jul 20, 2013 10:06 pm

Re: Intel 8254x Receive Problems

Post by kc9zda »

linuxyne: It appears your solution works quite well. That was a nice explanation. Thanks!

Here is my rx_poll code:

Code: Select all

void i8254x_rx_poll(int idx) {	
	while (i8254x_cards[idx].rx_descs[I8254X_INC_RDT(i8254x_cards[idx].rx_tail)].status & I8254X_RXDSTS_DD) { // if dd set in next descriptor from rx_tail
		i8254x_cards[idx].rx_tail = (i8254x_cards[idx].rx_tail + 1) % I8254X_RXDESC_COUNT;

		u16 pkt_len = i8254x_cards[idx].rx_descs[i8254x_cards[idx].rx_tail].length; // packet length
		bool dropflag = FALSE;

		if (pkt_len < 60) { // small packet
			printf("d");
			dropflag = TRUE;
			}
		if (!(i8254x_cards[idx].rx_descs[i8254x_cards[idx].rx_tail].status & I8254X_RXDSTS_EOP)) { // eop not set
			printf("D");
			dropflag = TRUE;
			}
		if (i8254x_cards[idx].rx_descs[i8254x_cards[idx].rx_tail].errors) { // packet had errors
			printf("i8254x_rx_poll: packet had errors; dropping\n");
			dropflag = TRUE;
			}
		if (!dropflag) { // packet has no reason to be dropped
			printf("i8254x_rx_poll: good packet # %u, %u bytes\n",i8254x_cards[idx].rx_count,pkt_len);
			i8254x_cards[idx].rx_count++;
			}
		i8254x_cards[idx].rx_descs[i8254x_cards[idx].rx_tail].status = 0;
		i8254x_cards[idx].rx_descs[i8254x_cards[idx].rx_tail].errors = 0;
		i8254x_mmio_write32(idx, I8254X_REG_RDT, i8254x_cards[idx].rx_tail);
		}
	}
linuxyne
Member
Member
Posts: 211
Joined: Sat Jul 02, 2016 7:02 am

Re: Intel 8254x Receive Problems

Post by linuxyne »

The while condition at the top, which checks the status.DD bit, should not satisfy for rx_tail, since the descriptor pointed to by rx_tail is kept clear.

I am not sure if the function would work always, unless I am missing something in the code.

The suggestion in my previous post was too short. Below is what I meant:

Code: Select all

// upon enter, RDT == rx_tail.
tail = (rx_tail + 1) mod count;
while (desc[tail].status.DD) {
   // work on desc[tail].
   // clear desc[tail].

   // advance the RDT to release the previous
   // desc for use by hw. 
   rx_tail = RDT = tail;
   
   // work on the next desc.
   tail = (tail + 1) mod count;
}
Edit: Correction in the comment about the release of the desc.
kc9zda
Posts: 9
Joined: Sat Jul 20, 2013 10:06 pm

Re: Intel 8254x Receive Problems

Post by kc9zda »

I fixed it and it still seems to work. Thanks
Post Reply