--- ./kernel/Kbuild +++ ./kernel/Kbuild @@ -72,6 +72,7 @@ EXTRA_CFLAGS += -Wall -MD $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno- EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM -DNV_VERSION_STRING=\"450.156\" -Wno-unused-function -Wuninitialized -fno-strict-aliasing -mno-red-zone -mcmodel=kernel -DNV_UVM_ENABLE EXTRA_CFLAGS += $(call cc-option,-Werror=undef,) EXTRA_CFLAGS += -DNV_SPECTRE_V2=$(NV_SPECTRE_V2) +EXTRA_CFLAGS += -DNV_KERNEL_INTERFACE_LAYER -Wfatal-errors # # Detect SGI UV systems and apply system-specific optimizations. --- ./kernel/conftest.sh +++ ./kernel/conftest.sh @@ -4576,7 +4576,7 @@ case "$5" in # VERBOSE=$6 iommu=CONFIG_VFIO_IOMMU_TYPE1 - mdev=CONFIG_VFIO_MDEV_DEVICE + mdev=CONFIG_VFIO_MDEV kvm=CONFIG_KVM_VFIO VFIO_IOMMU_PRESENT=0 VFIO_MDEV_DEVICE_PRESENT=0 --- ./kernel/nvidia-vgpu-vfio/nvidia-vgpu-vfio.c +++ ./kernel/nvidia-vgpu-vfio/nvidia-vgpu-vfio.c @@ -24,6 +24,10 @@ #include #include #include +#include +#include +#include +#include #include "nvstatus.h" #include "nv-misc.h" #include "nv-linux.h" @@ -37,6 +41,25 @@ struct vgpu_devs vgpu_devices; struct phys_devs phys_devices; +struct mdev_parent { + struct device *dev; + const struct mdev_parent_ops *ops; + struct kref ref; + struct list_head next; + struct kset *mdev_types_kset; + struct list_head type_list; + /* Synchronize device creation/removal with parent unregistration */ + struct rw_semaphore unreg_sem; +}; + +struct mdev_type { + struct kobject kobj; + struct kobject *devices_kobj; + struct mdev_parent *parent; + struct list_head next; + unsigned int type_group_id; +}; + #define SLEEP_TIME_MILLISECONDS 20 #define VGPU_EXIT_TIMEOUT_MILLISECONDS 5000 #define WAITQUEUE_TIMEOUT_SECONDS 25000 @@ -162,8 +185,8 @@ struct parent_ops vgpu_fops = { .remove = nv_vgpu_vfio_destroy, .read = nv_vgpu_vfio_read, .write = nv_vgpu_vfio_write, - .open = nv_vgpu_vfio_open, - .release = nv_vgpu_vfio_close, + .open_device = nv_vgpu_vfio_open, + .close_device = nv_vgpu_vfio_close, .ioctl = nv_vgpu_vfio_ioctl, .mmap = nv_vgpu_vfio_mmap, }; @@ -368,9 +391,9 @@ static NV_STATUS nv_get_vgpu_type_id(const char *kobj_name, struct device *dev, return NV_OK; } -static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf) +static ssize_t name_show(struct mdev_type *mtype, struct mdev_type_attribute *attr, char *buf) { - struct pci_dev *pdev = to_pci_dev(dev); + struct pci_dev *pdev = to_pci_dev(mtype->parent->dev); struct pci_dev *parent_device; NvU32 vgpu_type_id; NV_STATUS status; @@ -381,7 +404,7 @@ static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf) parent_device = pdev; - if (nv_get_vgpu_type_id(kobj->name, dev, &vgpu_type_id) + if (nv_get_vgpu_type_id(mtype->kobj.name, mtype->parent->dev, &vgpu_type_id) == NV_OK) status = rm_vgpu_vfio_ops.get_name(parent_device, vgpu_type_id, buf); else @@ -394,9 +417,9 @@ static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf) } MDEV_TYPE_ATTR_RO(name); -static ssize_t description_show(struct kobject *kobj, struct device *dev, char *buf) +static ssize_t description_show(struct mdev_type *mtype, struct mdev_type_attribute *attr, char *buf) { - struct pci_dev *pdev = to_pci_dev(dev); + struct pci_dev *pdev = to_pci_dev(mtype->parent->dev); struct pci_dev *parent_device; NvU32 vgpu_type_id; NV_STATUS status; @@ -407,7 +430,7 @@ static ssize_t description_show(struct kobject *kobj, struct device *dev, char * parent_device = pdev; - if (nv_get_vgpu_type_id(kobj->name, dev, &vgpu_type_id) + if (nv_get_vgpu_type_id(mtype->kobj.name, mtype->parent->dev, &vgpu_type_id) == NV_OK) status = rm_vgpu_vfio_ops.get_description(parent_device, vgpu_type_id, buf); else @@ -420,13 +443,13 @@ static ssize_t description_show(struct kobject *kobj, struct device *dev, char * } MDEV_TYPE_ATTR_RO(description); -static ssize_t available_instances_show(struct kobject *kobj, struct device *dev, char *buf) +static ssize_t available_instances_show(struct mdev_type *t, struct mdev_type_attribute *ta, char *buf) { - struct pci_dev *pdev = to_pci_dev(dev); + struct pci_dev *pdev = to_pci_dev(t->parent->dev); NvU32 vgpu_type_id; NV_STATUS status; - if ((nv_get_vgpu_type_id(kobj->name, dev, &vgpu_type_id)) == NV_OK) + if ((nv_get_vgpu_type_id(t->kobj.name, t->parent->dev, &vgpu_type_id)) == NV_OK) status = rm_vgpu_vfio_ops.get_instances(pdev, vgpu_type_id, buf); else return -EINVAL; @@ -438,8 +461,7 @@ static ssize_t available_instances_show(struct kobject *kobj, struct device *dev } MDEV_TYPE_ATTR_RO(available_instances); -static ssize_t device_api_show(struct kobject *kobj, struct device *dev, - char *buf) +static ssize_t device_api_show(struct mdev_type *t, struct mdev_type_attribute *ta, char *buf) { return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING); @@ -534,7 +556,7 @@ destroy_exit: return ret; } -static int nv_vgpu_vfio_create(struct kobject *kobj, struct mdev_device *mdev) +static int nv_vgpu_vfio_create(struct mdev_device *mdev) { NV_STATUS status = NV_OK; vgpu_dev_t *vgpu_dev = NULL; @@ -556,7 +578,7 @@ static int nv_vgpu_vfio_create(struct kobject *kobj, struct mdev_device *mdev) if (!pdev) return -EINVAL; - if (nv_get_vgpu_type_id(kobj->name, NV_GET_MDEV_PARENT(mdev), &vgpu_type_id) + if (nv_get_vgpu_type_id(mdev->type->kobj.name, NV_GET_MDEV_PARENT(mdev), &vgpu_type_id) != NV_OK) { ret = -EINVAL; @@ -631,12 +653,7 @@ static int nv_vgpu_vfio_create(struct kobject *kobj, struct mdev_device *mdev) if (pdev->is_virtfn) { #if defined(NV_MDEV_SET_IOMMU_DEVICE_PRESENT) - ret = mdev_set_iommu_device(NV_GET_MDEV_DEV(mdev), NV_GET_MDEV_PARENT(mdev)); - if (ret != 0) - { - NV_VGPU_DEV_LOG(VGPU_ERR, mdev, "Failed to set IOMMU device. ret: %d \n", ret); - goto remove_vgpu; - } + mdev_set_iommu_device(mdev, NV_GET_MDEV_PARENT(mdev)); #endif } @@ -2479,19 +2496,18 @@ invalidate_exit: static int vgpu_save_fd(vgpu_dev_t *vgpu_dev, int fd, NvU32 index) { - struct fd irqfd; + struct eventfd_ctx *evt; - irqfd = fdget(fd); - if (!irqfd.file) - return -EBADF; + evt = eventfd_ctx_fdget(fd); + if (IS_ERR(evt)) + return PTR_ERR(evt); if (index == VFIO_PCI_INTX_IRQ_INDEX) - vgpu_dev->intr_info.intx_file = irqfd.file; - else if (index == VFIO_PCI_MSI_IRQ_INDEX) - vgpu_dev->intr_info.msi_file = irqfd.file; + vgpu_dev->intr_info.intx_evtfd = evt; + else if (index == VFIO_PCI_MSI_IRQ_INDEX) + vgpu_dev->intr_info.msi_evtfd = evt; vgpu_dev->intr_info.index = index; - fdput(irqfd); return 0; } @@ -2500,11 +2516,8 @@ static int vgpu_save_fd(vgpu_dev_t *vgpu_dev, int fd, NvU32 index) static irqreturn_t vgpu_msix_handler(int irq, void *arg) { vgpu_dev_t *vgpu_dev = (vgpu_dev_t *)arg; - struct file *pfile = NULL; - mm_segment_t old_fs; - NvU64 val = 1; + struct eventfd_ctx *evt = NULL; int ret = 0; - loff_t offset = 0; int i; unsigned long eflags; @@ -2512,21 +2525,16 @@ static irqreturn_t vgpu_msix_handler(int irq, void *arg) { if (vgpu_dev->intr_info.allocated_irq[i] == irq) { - pfile = vgpu_dev->intr_info.msix_fd[i].file; + evt = vgpu_dev->intr_info.msix_evtfd[i]; break; } } - if (pfile && pfile->f_op && pfile->f_op->write) + if (evt) { - old_fs = get_fs(); - set_fs(KERNEL_DS); - NV_SAVE_FLAGS(eflags); - ret = pfile->f_op->write(pfile, (char *)&val, sizeof(val), &offset); + ret = eventfd_signal(evt, 1); NV_RESTORE_FLAGS(eflags); - - set_fs(old_fs); } return IRQ_HANDLED; @@ -2537,23 +2545,24 @@ static int vgpu_msix_set_vector_signal(vgpu_dev_t *vgpu_dev, { struct pci_dev *pdev; int irq = INVALID_IRQ, ret; - struct fd irqfd; + struct eventfd_ctx *evt; pdev = to_pci_dev(NV_GET_MDEV_PARENT(vgpu_dev->mdev)); - if (vgpu_dev->intr_info.msix_fd[vector].file) + if (vgpu_dev->intr_info.msix_evtfd[vector]) { free_irq(vgpu_dev->intr_info.allocated_irq[vector], vgpu_dev); - vgpu_dev->intr_info.msix_fd[vector].file = NULL; + eventfd_ctx_put(vgpu_dev->intr_info.msix_evtfd[vector]); + vgpu_dev->intr_info.msix_evtfd[vector] = NULL; vgpu_dev->intr_info.allocated_irq[vector] = INVALID_IRQ; } if (fd < 0) return 0; - irqfd = fdget(fd); - if (!irqfd.file) - return -EBADF; + evt = eventfd_ctx_fdget(fd); + if (IS_ERR(evt)) + return PTR_ERR(evt); if (vector < 0 || vector >= vgpu_dev->intr_info.num_ctx) return -EINVAL; @@ -2569,7 +2578,7 @@ static int vgpu_msix_set_vector_signal(vgpu_dev_t *vgpu_dev, vgpu_dev->intr_info.allocated_irq[vector] = irq; - vgpu_dev->intr_info.msix_fd[vector]= irqfd; + vgpu_dev->intr_info.msix_evtfd[vector]= evt; return 0; } @@ -2586,7 +2595,12 @@ static void vgpu_msix_disable(vgpu_dev_t *vgpu_dev) if (vgpu_dev->intr_info.allocated_irq[i] != INVALID_IRQ) { free_irq(vgpu_dev->intr_info.allocated_irq[i], vgpu_dev); - vgpu_dev->intr_info.msix_fd[i].file = NULL; + + if (vgpu_dev->intr_info.msix_evtfd[i]) { + eventfd_ctx_put(vgpu_dev->intr_info.msix_evtfd[i]); + vgpu_dev->intr_info.msix_evtfd[i] = NULL; + } + vgpu_dev->intr_info.allocated_irq[i] = INVALID_IRQ; } } @@ -2675,7 +2689,10 @@ static int nv_vgpu_vfio_set_irqs(vgpu_dev_t *vgpu_dev, uint32_t flags, { if (flags & VFIO_IRQ_SET_DATA_NONE) { - vgpu_dev->intr_info.intx_file = NULL; + if (vgpu_dev->intr_info.intx_evtfd) { + eventfd_ctx_put(vgpu_dev->intr_info.intx_evtfd); + vgpu_dev->intr_info.intx_evtfd = NULL; + } break; } @@ -2700,7 +2717,10 @@ static int nv_vgpu_vfio_set_irqs(vgpu_dev_t *vgpu_dev, uint32_t flags, { if (flags & VFIO_IRQ_SET_DATA_NONE) { - vgpu_dev->intr_info.msi_file = NULL; + if (vgpu_dev->intr_info.msi_evtfd) { + eventfd_ctx_put(vgpu_dev->intr_info.msi_evtfd); + vgpu_dev->intr_info.msi_evtfd = NULL; + } vgpu_dev->intr_info.index = VFIO_PCI_INTX_IRQ_INDEX; break; } @@ -2708,10 +2728,9 @@ static int nv_vgpu_vfio_set_irqs(vgpu_dev_t *vgpu_dev, uint32_t flags, if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { int fd = *(int *)data; - if (fd > 0) + if (fd > 0 && !vgpu_dev->intr_info.msi_evtfd) { - if (vgpu_dev->intr_info.msi_file == NULL) - ret = vgpu_save_fd(vgpu_dev, fd, index); + ret = vgpu_save_fd(vgpu_dev, fd, index); } } break; @@ -2766,12 +2785,9 @@ exit: NV_STATUS nv_vgpu_inject_interrupt(void *vgpuRef) { - mm_segment_t old_fs; - NvU64 val = 1; int ret = 0; - loff_t offset = 0; NV_STATUS status = NV_OK; - struct file *pfile = NULL; + struct eventfd_ctx *evt = NULL; vgpu_dev_t *vgpu_dev = vgpuRef; unsigned long eflags; @@ -2780,12 +2796,12 @@ NV_STATUS nv_vgpu_inject_interrupt(void *vgpuRef) NV_SPIN_LOCK_IRQSAVE(&vgpu_dev->intr_info_lock, eflags); - if ((vgpu_dev->intr_info.index == VFIO_PCI_MSI_IRQ_INDEX) && (vgpu_dev->intr_info.msi_file == NULL)) + if ((vgpu_dev->intr_info.index == VFIO_PCI_MSI_IRQ_INDEX) && (!vgpu_dev->intr_info.msi_evtfd)) { NV_SPIN_UNLOCK_IRQRESTORE(&vgpu_dev->intr_info_lock, eflags); return NV_ERR_INVALID_REQUEST; } - else if ((vgpu_dev->intr_info.index == VFIO_PCI_INTX_IRQ_INDEX) && (vgpu_dev->intr_info.intx_file == NULL)) + else if ((vgpu_dev->intr_info.index == VFIO_PCI_INTX_IRQ_INDEX) && (!vgpu_dev->intr_info.intx_evtfd)) { NV_SPIN_UNLOCK_IRQRESTORE(&vgpu_dev->intr_info_lock, eflags); return NV_ERR_INVALID_REQUEST; @@ -2797,9 +2813,9 @@ NV_STATUS nv_vgpu_inject_interrupt(void *vgpuRef) } if (vgpu_dev->intr_info.index == VFIO_PCI_MSI_IRQ_INDEX) - pfile = vgpu_dev->intr_info.msi_file; + evt = vgpu_dev->intr_info.msi_evtfd; else - pfile = vgpu_dev->intr_info.intx_file; + evt = vgpu_dev->intr_info.intx_evtfd; // QEMU has exited. So, safe to ignore interrupts. if (vgpu_dev->intr_info.ignore_interrupts == NV_TRUE) @@ -2809,19 +2825,14 @@ NV_STATUS nv_vgpu_inject_interrupt(void *vgpuRef) } NV_SPIN_UNLOCK_IRQRESTORE(&vgpu_dev->intr_info_lock, eflags); - old_fs = get_fs(); - set_fs(KERNEL_DS); - - if (pfile->f_op && pfile->f_op->write) - ret = pfile->f_op->write(pfile, (char *)&val, sizeof(val), &offset); - else - status = NV_ERR_INVALID_REQUEST; + if (evt) + ret = eventfd_signal(evt, 1); + else + status = NV_ERR_INVALID_REQUEST; if (ret < 0) status = NV_ERR_INVALID_STATE; - set_fs(old_fs); - return status; } @@ -4165,6 +4176,6 @@ static void __exit nv_vgpu_vfio_exit(void) module_init(nv_vgpu_vfio_init); module_exit(nv_vgpu_vfio_exit); -MODULE_LICENSE("MIT"); +MODULE_LICENSE("GPL"); MODULE_INFO(supported, "external"); MODULE_VERSION(NV_VERSION_STRING); --- ./kernel/nvidia-vgpu-vfio/nvidia-vgpu-vfio.h +++ ./kernel/nvidia-vgpu-vfio/nvidia-vgpu-vfio.h @@ -51,7 +51,7 @@ static NV_STATUS nv_vgpu_probe(struct pci_dev *dev, NvU32, NvU32 *); static NV_STATUS nv_vgpu_vfio_validate_map_request(struct mdev_device *, loff_t, NvU64 *, NvU64 *, NvU64 *, pgprot_t *, NvBool *); static void nv_vgpu_remove(struct pci_dev *); -static int nv_vgpu_vfio_create(struct kobject *, struct mdev_device *); +static int nv_vgpu_vfio_create(struct mdev_device *); static int nv_vgpu_vfio_destroy(struct mdev_device *mdev); static int nv_vgpu_vfio_open(struct mdev_device *); static void nv_vgpu_vfio_close(struct mdev_device *); @@ -293,19 +293,20 @@ typedef struct typedef struct { - struct file *intx_file; - struct file *msi_file; + struct eventfd_ctx *intx_evtfd; + struct eventfd_ctx *msi_evtfd; int index; NvBool ignore_interrupts; NvU32 allocated_irq[MAX_NUM_VECTORS]; NvU32 num_ctx; #if defined(NV_VGPU_KVM_BUILD) - struct fd msix_fd[MAX_NUM_VECTORS]; + struct eventfd_ctx *msix_evtfd[MAX_NUM_VECTORS]; #endif } intr_info_t; + typedef struct { NvU64 pending; --- ./kernel/nvidia/nv-frontend.c +++ ./kernel/nvidia/nv-frontend.c @@ -15,7 +15,7 @@ #include "nv-frontend.h" #if defined(MODULE_LICENSE) -MODULE_LICENSE("NVIDIA"); +MODULE_LICENSE("GPL"); #endif #if defined(MODULE_INFO) MODULE_INFO(supported, "external");