tu: Create drm fd per logical device

The main reason is to simplify BO managment when
bufferDeviceAddressCaptureReplay would be enabled.

Having to track some BO information in physical device and some
info in logical device gets challenging when BOs are shared
between logical devices.

Other benefits:
- Isolation from hangs in other logical devices;
- Each logical device limited only by its own address space size.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18254>
This commit is contained in:
Danylo Piliaiev
2022-10-11 15:43:31 +02:00
committed by Marge Bot
parent 0d9ceeee3f
commit 3a8fac0ccd
6 changed files with 96 additions and 32 deletions
+9 -1
View File
@@ -2207,7 +2207,13 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
device->instance = physical_device->instance;
device->physical_device = physical_device;
device->fd = physical_device->local_fd;
result = tu_drm_device_init(device);
if (result != VK_SUCCESS) {
vk_free(&device->vk.alloc, device);
return result;
}
device->vk.command_buffer_ops = &tu_cmd_buffer_ops;
device->vk.check_status = tu_device_check_status;
@@ -2532,6 +2538,8 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
tu_bo_suballocator_finish(&device->pipeline_suballoc);
tu_bo_suballocator_finish(&device->autotune_suballoc);
tu_drm_device_finish(device);
util_sparse_array_finish(&device->bo_map);
u_rwlock_destroy(&device->dma_bo_lock);
+4 -3
View File
@@ -73,6 +73,7 @@ struct tu_physical_device
struct wsi_device wsi_device;
char fd_path[20];
int local_fd;
bool has_local;
int64_t local_major;
@@ -106,9 +107,6 @@ struct tu_physical_device
int msm_major_version;
int msm_minor_version;
/* Address space and global fault count for this local_fd with DRM backend */
uint64_t fault_count;
/* with 0 being the highest priority */
uint32_t submitqueue_priority_count;
@@ -345,6 +343,9 @@ struct tu_device
uint32_t submit_count;
/* Address space and global fault count for this local_fd with DRM backend */
uint64_t fault_count;
struct u_trace_context trace_context;
#ifdef HAVE_PERFETTO
+15
View File
@@ -65,6 +65,18 @@ void tu_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo)
dev->instance->knl->bo_allow_dump(dev, bo);
}
VkResult
tu_drm_device_init(struct tu_device *dev)
{
return dev->instance->knl->device_init(dev);
}
void
tu_drm_device_finish(struct tu_device *dev)
{
dev->instance->knl->device_finish(dev);
}
int
tu_device_get_gpu_timestamp(struct tu_device *dev,
uint64_t *ts)
@@ -209,6 +221,9 @@ tu_physical_device_try_create(struct vk_instance *vk_instance,
device->master_fd = master_fd;
assert(strlen(path) < ARRAY_SIZE(device->fd_path));
snprintf(device->fd_path, ARRAY_SIZE(device->fd_path), "%s", path);
struct stat st;
if (stat(primary_path, &st) == 0) {
+8
View File
@@ -54,6 +54,8 @@ struct tu_bo {
struct tu_knl {
const char *name;
VkResult (*device_init)(struct tu_device *dev);
void (*device_finish)(struct tu_device *dev);
int (*device_get_gpu_timestamp)(struct tu_device *dev, uint64_t *ts);
int (*device_get_suspend_count)(struct tu_device *dev, uint64_t *suspend_count);
VkResult (*device_check_status)(struct tu_device *dev);
@@ -142,6 +144,12 @@ tu_physical_device_try_create(struct vk_instance *vk_instance,
struct _drmDevice *drm_device,
struct vk_physical_device **out);
VkResult
tu_drm_device_init(struct tu_device *dev);
void
tu_drm_device_finish(struct tu_device *dev);
int
tu_device_get_gpu_timestamp(struct tu_device *dev,
uint64_t *ts);
+45 -28
View File
@@ -51,9 +51,7 @@ struct tu_u_trace_syncobj
};
static int
tu_drm_get_param(const struct tu_physical_device *dev,
uint32_t param,
uint64_t *value)
tu_drm_get_param(int fd, uint32_t param, uint64_t *value)
{
/* Technically this requires a pipe, but the kernel only supports one pipe
* anyway at the time of writing and most of these are clearly pipe
@@ -63,8 +61,7 @@ tu_drm_get_param(const struct tu_physical_device *dev,
.param = param,
};
int ret = drmCommandWriteRead(dev->local_fd, DRM_MSM_GET_PARAM, &req,
sizeof(req));
int ret = drmCommandWriteRead(fd, DRM_MSM_GET_PARAM, &req, sizeof(req));
if (ret)
return ret;
@@ -77,7 +74,7 @@ static int
tu_drm_get_gpu_id(const struct tu_physical_device *dev, uint32_t *id)
{
uint64_t value;
int ret = tu_drm_get_param(dev, MSM_PARAM_GPU_ID, &value);
int ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_GPU_ID, &value);
if (ret)
return ret;
@@ -89,7 +86,7 @@ static int
tu_drm_get_gmem_size(const struct tu_physical_device *dev, uint32_t *size)
{
uint64_t value;
int ret = tu_drm_get_param(dev, MSM_PARAM_GMEM_SIZE, &value);
int ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_GMEM_SIZE, &value);
if (ret)
return ret;
@@ -100,7 +97,7 @@ tu_drm_get_gmem_size(const struct tu_physical_device *dev, uint32_t *size)
static int
tu_drm_get_gmem_base(const struct tu_physical_device *dev, uint64_t *base)
{
return tu_drm_get_param(dev, MSM_PARAM_GMEM_BASE, base);
return tu_drm_get_param(dev->local_fd, MSM_PARAM_GMEM_BASE, base);
}
static int
@@ -108,13 +105,13 @@ tu_drm_get_va_prop(const struct tu_physical_device *dev,
uint64_t *va_start, uint64_t *va_size)
{
uint64_t value;
int ret = tu_drm_get_param(dev, MSM_PARAM_VA_START, &value);
int ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_VA_START, &value);
if (ret)
return ret;
*va_start = value;
ret = tu_drm_get_param(dev, MSM_PARAM_VA_SIZE, &value);
ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_VA_SIZE, &value);
if (ret)
return ret;
@@ -127,7 +124,7 @@ static uint32_t
tu_drm_get_priorities(const struct tu_physical_device *dev)
{
uint64_t val = 1;
tu_drm_get_param(dev, MSM_PARAM_PRIORITIES, &val);
tu_drm_get_param(dev->local_fd, MSM_PARAM_PRIORITIES, &val);
assert(val >= 1);
return val;
@@ -152,30 +149,57 @@ tu_drm_is_memory_type_supported(int fd, uint32_t flags)
return true;
}
static VkResult
msm_device_init(struct tu_device *dev)
{
int fd = open(dev->physical_device->fd_path, O_RDWR | O_CLOEXEC);
if (fd < 0) {
return vk_startup_errorf(
dev->physical_device->instance, VK_ERROR_INITIALIZATION_FAILED,
"failed to open device %s", dev->physical_device->fd_path);
}
int ret = tu_drm_get_param(fd, MSM_PARAM_FAULTS, &dev->fault_count);
if (ret != 0) {
close(fd);
return vk_startup_errorf(dev->physical_device->instance,
VK_ERROR_INITIALIZATION_FAILED,
"Failed to get initial fault count: %d", ret);
}
dev->fd = fd;
return VK_SUCCESS;
}
static void
msm_device_finish(struct tu_device *dev)
{
close(dev->fd);
}
static int
msm_device_get_gpu_timestamp(struct tu_device *dev, uint64_t *ts)
{
return tu_drm_get_param(dev->physical_device, MSM_PARAM_TIMESTAMP, ts);
return tu_drm_get_param(dev->fd, MSM_PARAM_TIMESTAMP, ts);
}
static int
msm_device_get_suspend_count(struct tu_device *dev, uint64_t *suspend_count)
{
int ret = tu_drm_get_param(dev->physical_device, MSM_PARAM_SUSPENDS, suspend_count);
int ret = tu_drm_get_param(dev->fd, MSM_PARAM_SUSPENDS, suspend_count);
return ret;
}
static VkResult
msm_device_check_status(struct tu_device *device)
{
struct tu_physical_device *physical_device = device->physical_device;
uint64_t last_fault_count = physical_device->fault_count;
int ret = tu_drm_get_param(physical_device, MSM_PARAM_FAULTS, &physical_device->fault_count);
uint64_t last_fault_count = device->fault_count;
int ret = tu_drm_get_param(device->fd, MSM_PARAM_FAULTS, &device->fault_count);
if (ret != 0)
return vk_device_set_lost(&device->vk, "error getting GPU fault count: %d", ret);
if (last_fault_count != physical_device->fault_count)
if (last_fault_count != device->fault_count)
return vk_device_set_lost(&device->vk, "GPU faulted or hung");
return VK_SUCCESS;
@@ -1298,6 +1322,8 @@ msm_queue_submit(struct tu_queue *queue, struct vk_queue_submit *submit)
static const struct tu_knl msm_knl_funcs = {
.name = "msm",
.device_init = msm_device_init,
.device_finish = msm_device_finish,
.device_get_gpu_timestamp = msm_device_get_gpu_timestamp,
.device_get_suspend_count = msm_device_get_suspend_count,
.device_check_status = msm_device_check_status,
@@ -1332,7 +1358,6 @@ tu_knl_drm_msm_load(struct tu_instance *instance,
struct tu_physical_device **out)
{
VkResult result = VK_SUCCESS;
int ret;
/* Version 1.6 added SYNCOBJ support. */
const int min_version_major = 1;
@@ -1369,7 +1394,7 @@ tu_knl_drm_msm_load(struct tu_instance *instance,
goto fail;
}
if (tu_drm_get_param(device, MSM_PARAM_CHIP_ID, &device->dev_id.chip_id)) {
if (tu_drm_get_param(fd, MSM_PARAM_CHIP_ID, &device->dev_id.chip_id)) {
result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
"could not get CHIP ID");
goto fail;
@@ -1413,14 +1438,6 @@ tu_knl_drm_msm_load(struct tu_instance *instance,
}
#endif
ret = tu_drm_get_param(device, MSM_PARAM_FAULTS, &device->fault_count);
if (ret != 0) {
result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
"Failed to get initial fault count: %d", ret);
goto fail;
}
device->submitqueue_priority_count = tu_drm_get_priorities(device);
device->syncobj_type = vk_drm_syncobj_get_type(fd);
+15
View File
@@ -1150,6 +1150,19 @@ kgsl_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *synco
return VK_SUCCESS;
}
static VkResult
kgsl_device_init(struct tu_device *dev)
{
dev->fd = dev->physical_device->local_fd;
return VK_SUCCESS;
}
static void
kgsl_device_finish(struct tu_device *dev)
{
/* No-op */
}
static int
kgsl_device_get_gpu_timestamp(struct tu_device *dev, uint64_t *ts)
{
@@ -1193,6 +1206,8 @@ kgsl_device_check_status(struct tu_device *device)
static const struct tu_knl kgsl_knl_funcs = {
.name = "kgsl",
.device_init = kgsl_device_init,
.device_finish = kgsl_device_finish,
.device_get_gpu_timestamp = kgsl_device_get_gpu_timestamp,
.device_get_suspend_count = kgsl_device_get_suspend_count,
.device_check_status = kgsl_device_check_status,