On my side i've fixed some bugs related to VMX even in September (and committed these fixes to /ispras branch in October), especially bugs related to UG (unrestricted guest) mode.
Our teamleader sent patches to your team but said that no answer came.
Do you know anything about that and if `yes' then are going to use our patches?
Also, now i'm trying to run palacios on HP ProLiant Server (ProLiant BL2x220c G7) but linux-2.6.32-71.29.1.el6.x86_64 (CentOS 6.0) fails.
) to run under palacios (some unrelated parts were changed because compiler didn't like those lines) :
Code: Select all
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 81fb1b0..7b73747 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -232,6 +232,10 @@ source "init/Kconfig"
source "kernel/Kconfig.freezer"
menu "Processor type and features"
+config PALACIOS
+ bool "Palacios support"
+ help
+ No help.
source "kernel/time/Kconfig"
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 631958a..42a0969 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -99,6 +99,7 @@ obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
+obj-$(CONFIG_PALACIOS) += palacios.o
obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 5acdbc7..f46b207 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1303,7 +1303,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
decl PER_CPU_VAR(irq_count)
jmp error_exit
CFI_ENDPROC
-END(do_hypervisor_callback)
+END(xen_do_hypervisor_callback)
/*
* Hypervisor uses this for application faults while it executes.
diff --git a/arch/x86/kernel/palacios.c b/arch/x86/kernel/palacios.c
new file mode 100644
index 0000000..b4f5cbf
--- /dev/null
+++ b/arch/x86/kernel/palacios.c
@@ -0,0 +1,132 @@
+/*
+ * palacios.c
+ *
+ * Created on: Jul 12, 2011
+ * Author: vedun
+ */
+
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/gfp.h>
+#include <asm/msr.h>
+#include <palacios/palacios.h>
+#include <asm/io.h>
+
+#define NO_SYM_HV 0
+#define SYM_HV_VMX 1
+#define SYM_HV_SVM 2
+#define SYM_PAGE_SIZE 12
+#define SYM_CPUID_NUM 0x90000000
+#define MEM_OFFSET_HCALL 0x1000
+
+#define SYM_MSR_GLOBAL 0x00000534
+
+static struct v3_symspy_global_page {
+ uint64_t magic;
+
+ union {
+ uint32_t feature_flags;
+ struct {
+ uint8_t pci_map_valid : 1;
+ uint8_t symmod_enabled : 1;
+ uint8_t sec_symmod_enabled : 1;
+ } __attribute__((packed));
+ } __attribute__((packed));
+
+ uint8_t pci_pt_map[(4 * 256) / 8]; // we're hardcoding this: (4 busses, 256 max devs)
+
+} __attribute__((packed)) *symspy_global_page;
+
+
+static int symspy_is_initialized = 0;
+static int vm_is_detected = 0;
+static unsigned long long mem_offset = 0;
+
+static int detect_sym_hv(void) {
+ unsigned int eax = 0, ebx = 0;
+ printk("Detecting symbiotic hypervisor..\n");
+
+ asm volatile(
+ "cpuid;" :"=a"(eax),"=b"(ebx):"a"((unsigned int)SYM_CPUID_NUM)
+ );
+
+ if(eax == *(unsigned int*)"V3V") {
+ printk("V3VEE detected: arch %s.\n", (char*)&ebx);
+
+ if(ebx == *(unsigned int*)"SVM")
+ return SYM_HV_SVM;
+ else if(ebx == *(unsigned int*)"VMX")
+ return SYM_HV_VMX;
+ else {
+ printk("Bad signature!\n");
+ return NO_SYM_HV;
+ }
+ }
+
+ printk("V3VEE not detected. EAX %x EBX %x\n", eax, ebx);
+
+ return NO_SYM_HV;
+}
+
+static int symbiotic_test(void) {
+ int detect = 0;
+ void* vaddr;
+ dma_addr_t paddr;
+
+ printk("SYMBIOTIC TEST START\n");
+ if((detect = detect_sym_hv()) != NO_SYM_HV) {
+ int status = 0;
+ if(detect == SYM_HV_SVM) {
+ asm volatile(
+ "vmmcall;"
+ :"=a"(status), "=b"(mem_offset):"a"(MEM_OFFSET_HCALL)
+ );
+ } else {
+ asm volatile(
+ "vmcall;"
+ :"=a"(status), "=b"(mem_offset):"a"(MEM_OFFSET_HCALL)
+ );
+ }
+ if(status != 0) {
+ printk("Hypercall finished with error.\n");
+ } else {
+ printk("Detected memory offset %llx.\n", mem_offset);
+ }
+
+ vaddr = (void *) __get_free_page(GFP_KERNEL);
+ paddr = virt_to_phys(vaddr);
+ //unsigned long long value = paddr;
+ wrmsr(SYM_MSR_GLOBAL, paddr & 0xFFFFFFFF, paddr >> 32);
+ symspy_global_page = vaddr;
+
+ printk("SymspyGlobalPage detected at VA %LX, PA %LX\n", (long long)vaddr, (long long)paddr);
+
+ return 1;
+ }
+ return 0;
+}
+
+
+uint64_t palacios_get_device_dma_offset(int bus, int dev, int func) {
+ if (bus >= 4)
+ return 0;
+
+ if (!symspy_is_initialized) {
+ vm_is_detected = detect_sym_hv();
+ symbiotic_test();
+ symspy_is_initialized = 1;
+ }
+
+ if (vm_is_detected) {
+ int dev_index = (bus << 8) + (dev << 3) + func;
+ int major = dev_index / 8;
+ int minor = dev_index % 8;
+ return ((symspy_global_page->pci_pt_map[major] & (1 << minor)) == 0) ? 0 : mem_offset;
+ } else {
+ return 0;
+ }
+}
+
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 6ac3931..efbc143 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -169,7 +169,10 @@ again:
return NULL;
}
- *dma_addr = addr;
+ *dma_addr = addr + dev->dma_offset;
+ if (dev->dma_offset != 0) {
+ printk("Alloc %lX %lX : %lX\n", (long unsigned)*dma_addr , (long unsigned)(*dma_addr - dev->dma_offset), (long unsigned)page);
+ }
return page_address(page);
}
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index a3933d4..b738fef 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -30,7 +30,8 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
enum dma_data_direction dir,
struct dma_attrs *attrs)
{
- dma_addr_t bus = page_to_phys(page) + offset;
+ dma_addr_t bus = page_to_phys(page) + offset + dev->dma_offset;
+ printk("map_page: %lX %lX : %lX\n", (long unsigned)bus, (long unsigned)(bus - dev->dma_offset), (long unsigned)page);
WARN_ON(size == 0);
if (!check_addr("map_single", dev, bus, size))
return bad_dma_address;
@@ -64,7 +65,7 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
for_each_sg(sg, s, nents, i) {
BUG_ON(!sg_page(s));
- s->dma_address = sg_phys(s);
+ s->dma_address = sg_phys(s) + hwdev->dma_offset;
if (!check_addr("map_sg", hwdev, s->dma_address, s->length))
return 0;
s->dma_length = s->length;
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index c34dca8..08e5e33 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -1491,7 +1491,7 @@ static void uv_init_per_cpu(int nuvhubs)
int uvhub;
short socket = 0;
unsigned short socket_mask;
- unsigned int uvhub_mask;
+ unsigned int uvhub_mask = 0;
struct bau_control *bcp;
struct uvhub_desc *bdp;
struct socket_desc *sdp;
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 35236aa..9b63cba 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -3225,7 +3225,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
VPRINTK("ENTER\n");
- WARN_ON(ATA_MAX_QUEUE > AHCI_MAX_CMDS);
+ //WARN_ON(ATA_MAX_QUEUE > AHCI_MAX_CMDS);
if (!printed_version++)
dev_printk(KERN_DEBUG, &pdev->dev, "version " DRV_VERSION "\n");
diff --git a/drivers/base/core.c b/drivers/base/core.c
index fab9f76..f4c9e60 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -557,6 +557,7 @@ static void klist_children_put(struct klist_node *n)
*/
void device_initialize(struct device *dev)
{
+ dev->dma_offset = 0;
dev->kobj.kset = devices_kset;
kobject_init(&dev->kobj, &device_ktype);
INIT_LIST_HEAD(&dev->dma_pools);
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 1eef267..a6044ed 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -473,7 +473,7 @@ static ssize_t dev_show_unique_id(struct device *dev,
{
drive_info_struct *drv = to_drv(dev);
struct ctlr_info *h = to_hba(drv->dev.parent);
- __u8 sn[16];
+ __u8 sn[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
unsigned long flags;
int ret = 0;
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index cef28a7..b2046e4 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -17,6 +17,10 @@
#include "pci.h"
+#ifdef CONFIG_PALACIOS
+#include <palacios/palacios.h>
+#endif
+
/**
* pci_bus_alloc_resource - allocate a resource from a parent bus
* @bus: PCI bus
@@ -92,6 +96,13 @@ int pci_bus_add_device(struct pci_dev *dev)
dev->is_added = 1;
pci_proc_attach_device(dev);
pci_create_sysfs_dev_files(dev);
+
+#ifdef CONFIG_PALACIOS
+ dev->dev.dma_offset = palacios_get_device_dma_offset
+ (dev->bus->number, dev->devfn >> 3, dev->devfn & 0x7);
+ printk("Palacios for %d:%d : DMA Offset is %lX\n", dev->bus->number, dev->devfn, (unsigned long)dev->dev.dma_offset);
+#endif
+
return 0;
}
diff --git a/fs/compat.c b/fs/compat.c
index dc7853a..c8fb2f3 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -15,6 +15,7 @@
* published by the Free Software Foundation.
*/
+#include <linux/stddef.h>
#include <linux/kernel.h>
#include <linux/linkage.h>
#include <linux/compat.h>
@@ -817,8 +818,6 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
return retval;
}
-#define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de)))
-
struct compat_old_linux_dirent {
compat_ulong_t d_ino;
compat_ulong_t d_offset;
@@ -907,7 +906,7 @@ static int compat_filldir(void *__buf, const char *name, int namlen,
struct compat_linux_dirent __user * dirent;
struct compat_getdents_callback *buf = __buf;
compat_ulong_t d_ino;
- int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 2, sizeof(compat_long_t));
+ int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) + namlen + 2, sizeof(compat_long_t));
buf->error = -EINVAL; /* only used if we fail.. */
if (reclen > buf->count)
@@ -994,7 +993,7 @@ static int compat_filldir64(void * __buf, const char * name, int namlen, loff_t
{
struct linux_dirent64 __user *dirent;
struct compat_getdents_callback64 *buf = __buf;
- int jj = NAME_OFFSET(dirent);
+ int jj = offsetof(struct compat_linux_dirent, d_name);
int reclen = ALIGN(jj + namlen + 1, sizeof(u64));
u64 off;
diff --git a/fs/readdir.c b/fs/readdir.c
index 7723401..ab07ead 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -4,6 +4,7 @@
* Copyright (C) 1995 Linus Torvalds
*/
+#include <linux/stddef.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/time.h>
@@ -54,7 +55,6 @@ EXPORT_SYMBOL(vfs_readdir);
* anyway. Thus the special "fillonedir()" function for that
* case (the low-level handlers don't need to care about this).
*/
-#define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de)))
#ifdef __ARCH_WANT_OLD_READDIR
@@ -152,7 +152,7 @@ static int filldir(void * __buf, const char * name, int namlen, loff_t offset,
struct linux_dirent __user * dirent;
struct getdents_callback * buf = (struct getdents_callback *) __buf;
unsigned long d_ino;
- int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 2, sizeof(long));
+ int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2, sizeof(long));
buf->error = -EINVAL; /* only used if we fail.. */
if (reclen > buf->count)
@@ -237,7 +237,7 @@ static int filldir64(void * __buf, const char * name, int namlen, loff_t offset,
{
struct linux_dirent64 __user *dirent;
struct getdents_callback64 * buf = (struct getdents_callback64 *) __buf;
- int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 1, sizeof(u64));
+ int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 1, sizeof(u64));
buf->error = -EINVAL; /* only used if we fail.. */
if (reclen > buf->count)
diff --git a/include/Kbuild b/include/Kbuild
index 8d226bf..f1da0d9 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -10,3 +10,4 @@ header-y += video/
header-y += drm/
header-y += xen/
header-y += scsi/
+header-y += palacios/
diff --git a/include/linux/device.h b/include/linux/device.h
index 2ea3e49..ad6273d 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -407,6 +407,7 @@ struct device {
allocations such descriptors. */
struct device_dma_parameters *dma_parms;
+ u64 dma_offset; /* Palacios dma offset */
struct list_head dma_pools; /* dma pools (if dma'ble) */
diff --git a/include/palacios/Kbuild b/include/palacios/Kbuild
new file mode 100644
index 0000000..4bed9d9
--- /dev/null
+++ b/include/palacios/Kbuild
@@ -0,0 +1 @@
+header-y += palacios.h
\ No newline at end of file
diff --git a/include/palacios/palacios.h b/include/palacios/palacios.h
new file mode 100644
index 0000000..b7077a1
--- /dev/null
+++ b/include/palacios/palacios.h
@@ -0,0 +1,13 @@
+/*
+ * palacios.h
+ *
+ * Created on: Jul 12, 2011
+ * Author: vedun
+ */
+
+#ifndef __PALACIOS_H
+#define __PALACIOS_H
+
+uint64_t palacios_get_device_dma_offset(int bus, int dev, int func);
+
+#endif /* __PALACIOS_H */
diff --git a/init/main.c b/init/main.c
index 1a9af60..8c8bc23 100644
--- a/init/main.c
+++ b/init/main.c
@@ -744,11 +744,10 @@ int do_one_initcall(initcall_t fn)
calltime = ktime_get();
trace_boot_call(&call, fn);
enable_boot_trace();
- }
+
- ret.result = fn();
+ ret.result = fn();
- if (initcall_debug) {
disable_boot_trace();
rettime = ktime_get();
delta = ktime_sub(rettime, calltime);
@@ -756,6 +755,8 @@ int do_one_initcall(initcall_t fn)
trace_boot_ret(&ret, fn);
printk("initcall %pF returned %d after %Ld usecs\n", fn,
ret.result, ret.duration);
+ } else {
+ ret.result = fn();
}
msgbuf[0] = 0;
diff --git a/kernel/async.c b/kernel/async.c
index 27235f5..393e033 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -284,17 +284,17 @@ void async_synchronize_cookie_domain(async_cookie_t cookie,
if (initcall_debug && system_state == SYSTEM_BOOTING) {
printk("async_waiting @ %i\n", task_pid_nr(current));
starttime = ktime_get();
- }
- wait_event(async_done, lowest_in_progress(running) >= cookie);
+ wait_event(async_done, lowest_in_progress(running) >= cookie);
- if (initcall_debug && system_state == SYSTEM_BOOTING) {
endtime = ktime_get();
delta = ktime_sub(endtime, starttime);
printk("async_continuing @ %i after %lli usec\n",
task_pid_nr(current),
(long long)ktime_to_ns(delta) >> 10);
+ } else {
+ wait_event(async_done, lowest_in_progress(running) >= cookie);
}
}
EXPORT_SYMBOL_GPL(async_synchronize_cookie_domain);
diff --git a/lib/iomap.c b/lib/iomap.c
index d322293..bd32c25 100644
--- a/lib/iomap.c
+++ b/lib/iomap.c
@@ -258,20 +258,27 @@ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
resource_size_t start = pci_resource_start(dev, bar);
resource_size_t len = pci_resource_len(dev, bar);
unsigned long flags = pci_resource_flags(dev, bar);
+ void __iomem *ret = NULL;
if (!len || !start)
return NULL;
if (maxlen && len > maxlen)
len = maxlen;
- if (flags & IORESOURCE_IO)
- return ioport_map(start, len);
+ if (flags & IORESOURCE_IO) {
+ ret = ioport_map(start, len);
+ goto end;
+ }
if (flags & IORESOURCE_MEM) {
- if (flags & IORESOURCE_CACHEABLE)
- return ioremap(start, len);
- return ioremap_nocache(start, len);
+ if (flags & IORESOURCE_CACHEABLE) {
+ ret = ioremap(start, len);
+ goto end;
+ }
+ ret = ioremap_nocache(start, len);
}
+end:
+ printk("DEBUG : Mapping %lX..%lX to %lX\n", (unsigned long)start, (unsigned long)len, (unsigned long)ret);
/* What? */
- return NULL;
+ return ret;
}
void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
As far as i find more information, i'll update this post.