Page 1 of 1

AHCI driver not working in real hardware

Posted: Sun Sep 12, 2021 4:28 pm
by Bonfra
So i followed the wiki to create an ahci driver, i enumerated the pci for each combination of (port, bus, function)

Code: Select all

for(int bus = 0; bus < PCI_MAX_BUS; bus++)
        for(int dev = 0; dev < PCI_MAX_DEVICE; dev++)
            for(int func = 0; func < PCI_MAX_FUNCTION; func++)
                if(pci_read_word(bus, dev, func, 0) != UINT16_MAX)
                {
                    volatile pci_device_t data;
                    uint32_t* dataptr = (uint32_t*)&data;

                    // read in the 256 bytes (64 dwords)
                    for(int i = 0; i < 64; i++)
                        dataptr[i] = pci_read_dword(bus, dev, func, i << 2);
                
                    // ...
                }
I switched on the class of the device and registered the device:

Code: Select all

                    switch (data.class)
                    {
                        case PCI_CLASS_ATA:
                        {
                            uint16_t command_r = pci_read_word(bus, dev, func, 0x4);
                            pci_write_word(bus, dev, func, 0x4, command_r | (1 << 2)); // bus master
                            ata_register_device(&data);
                            break;
                        }
                    }
register the device meaning

Code: Select all

volatile hba_mem_t* hba_mem = (volatile hba_mem_t*)(uint64_t)device->base5;

    for(int bit = 0; bit < 32; bit++)
        if(hba_mem->pi & (1 << bit)) // bit is set: device exists
        {
            volatile hba_port_t* port = &(hba_mem->ports[bit]);
            
            switch (check_type(port))
            {
            case AHCI_DEV_SATA:
                {
                    hba_device_t hba_device;
                    hba_device.port = port;

                    devices[registered_devices] = hba_device;
                    configure_device(&(devices[registered_devices]));
                    registered_devices++;
                }
                break;
            }
        }
And finally that configure function does this:

Code: Select all

static void configure_device(hba_device_t* device)
{
    uint64_t page0 = (uint64_t)pfa_alloc_page();
    uint64_t page1 = (uint64_t)pfa_alloc_page();
    uint64_t page2 = (uint64_t)pfa_alloc_page();

    uint64_t cmd_address = page0;
    uint32_t cmd_low = cmd_address & 0xFFFFFFFFLL;
    uint32_t cmd_high = (cmd_address >> 32) & 0xFFFFFFFFLL;

    uint64_t fis_address = page0 + 1024;
    uint32_t fis_low = fis_address & 0xFFFFFFFFLL;
    uint32_t fis_high = (fis_address >> 32) & 0xFFFFFFFFLL;

    stop_cmd(device->port);

    device->port->clb = cmd_low;
    device->port->clbu = cmd_high;
    memset((void*)cmd_address, 0, 1024);

    device->port->fb = fis_low;
    device->port->fbu = fis_high;
    memset((void*)fis_address, 0, 256);

    device->cmd_header = (hba_cmd_header_t*)cmd_address;
    for (int i = 0; i < 32; i++)
    {
        device->cmd_header[i].prdtl = 1;

        uint64_t cmd_addr = i < 16 ? page1 : page2;
        cmd_addr += 256 * (i % 16);

        uint32_t cmd_addr_low = cmd_addr & 0xFFFFFFFFLL;
        uint32_t cmd_addr_high = (cmd_addr >> 32) & 0xFFFFFFFFLL;

        device->cmd_header[i].ctba = cmd_addr_low;
        device->cmd_header[i].ctbau = cmd_addr_high;
        memset((void*)cmd_addr, 0, 256);
    }

    start_cmd(device->port);

    ahci_identify(device);
}
So now, following the wiki, i should be able to read data from the disk with this function:

Code: Select all

bool sata_read(size_t device, uint64_t lba, uint8_t count, volatile void* address)
{
    if(device >= registered_devices)
        return false;

    hba_device_t* hba_device = &(devices[device]);

    hba_device->port->is = -1; // Clear pending interrupt bits
    int slot = find_cmdslot(hba_device->port);

    if(slot == -1)
        return false;

    volatile hba_cmd_header_t* cmd = (volatile hba_cmd_header_t*)hba_device->port->clb;
    cmd += slot;
    cmd->cfl = sizeof(fis_reg_h2d_t) / sizeof(uint32_t);
    cmd->w = 0;
    cmd->prdtl = 1;

    volatile hba_cmd_table_t* cmdtable = (volatile hba_cmd_table_t*)(((uint64_t)cmd->ctbau >> 32) | (uint64_t)cmd->ctba);
    memset((void*)cmdtable, 0, sizeof(hba_cmd_table_t) + (cmd->prdtl - 1) * sizeof(hba_prdt_entry_t));

    cmdtable->prdt_entry[0].dba = (uint32_t)(uint64_t)address;
    cmdtable->prdt_entry[0].dbau = ((uint64_t)address >> 32);
    cmdtable->prdt_entry[0].dbc = ((uint64_t)count) * 512 - 1;
    cmdtable->prdt_entry[0].i = 1;

    volatile fis_reg_h2d_t* cmdfis = (volatile fis_reg_h2d_t*)(cmdtable->cfis);
    memset((void*)cmdfis, 0, sizeof(fis_reg_h2d_t));
    cmdfis->fis_type = FIS_TYPE_REG_H2D;
    cmdfis->command = ATA_CMD_READ_DMA_EX;
    cmdfis->device = 1 << 6;    // LBA mode
    cmdfis->c = 1;  // Command

    uint32_t lba_low = lba & 0xFFFFFFFFLL;
    uint32_t lba_high = (lba >> 32) & 0xFFFFFFFFLL;

    cmdfis->lba0 = (uint8_t)lba_low;
    cmdfis->lba1 = (uint8_t)(lba_low >> 8);
    cmdfis->lba2 = (uint8_t)(lba_low >> 16);
    cmdfis->lba3 = (uint8_t)(lba_low >> 24);
    cmdfis->lba4 = (uint8_t)lba_high;
    cmdfis->lba5 = (uint8_t)(lba_high >> 8);
 
    cmdfis->countl = count & 0xFF;
    cmdfis->counth = (count >> 8) & 0xFF;

    int spin = 0; // Spin lock timeout counter
    while ((hba_device->port->tfd & (ATA_DEV_BUSY | ATA_DEV_DRQ)) && spin < 1000000)
        spin++;

    if (spin == 1000000)
        return false;

    hba_device->port->ci = 1 << slot;

    // Wait for completion
    while (1)
    {
        // In some longer duration reads, it may be helpful to spin on the DPS bit 
        // in the PxIS port field as well (1 << 5)
        if ((hba_device->port->ci & (1 << slot)) == 0) 
            break;
        if (hba_device->port->is & HBA_PxIS_TFES)   // Task file error
            return false;
    }

    // Check again
    if (hba_device->port->is & HBA_PxIS_TFES)
        return false;

    return true;
}
except, it works in qemu, but it returns all zeros in real hw, i tested it also with VMware and it seems to fail on the final "wait for completition loop" so it returns before it has ended to read; infact if i place a small delay after each read some data is correct.
What am i doing wrong?