diff options
Diffstat (limited to 'src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c')
-rw-r--r-- | src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c | 1122 |
1 files changed, 1122 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c new file mode 100644 index 0000000000..194814e8fb --- /dev/null +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c @@ -0,0 +1,1122 @@ +/************************************************************************** + * + * Copyright © 2007 Red Hat Inc. + * Copyright © 2007 Intel Corporation + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com> + * Keith Whitwell <keithw-at-tungstengraphics-dot-com> + * Eric Anholt <eric@anholt.net> + * Dave Airlie <airlied@linux.ie> + */ + +#include <xf86drm.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <assert.h> + +#include "errno.h" +#include "mtypes.h" +#include "dri_bufmgr.h" +#include "string.h" +#include "imports.h" + +#include "i915_drm.h" + +#include "intel_bufmgr_ttm.h" +#ifdef TTM_API + +#define DBG(...) do { \ + if (bufmgr_ttm->bufmgr.debug) \ + fprintf(stderr, __VA_ARGS__); \ +} while (0) + +/* + * These bits are always specified in each validation + * request. Other bits are not supported at this point + * as it would require a bit of investigation to figure + * out what mask value should be used. + */ +#define INTEL_BO_MASK (DRM_BO_MASK_MEM | \ + DRM_BO_FLAG_READ | \ + DRM_BO_FLAG_WRITE | \ + DRM_BO_FLAG_EXE) + +struct intel_validate_entry { + dri_bo *bo; + struct drm_i915_op_arg bo_arg; +}; + +struct dri_ttm_bo_bucket_entry { + drmBO drm_bo; + struct dri_ttm_bo_bucket_entry *next; +}; + +struct dri_ttm_bo_bucket { + struct dri_ttm_bo_bucket_entry *head; + struct dri_ttm_bo_bucket_entry **tail; + /** + * Limit on the number of entries in this bucket. + * + * 0 means that this caching at this bucket size is disabled. + * -1 means that there is no limit to caching at this size. + */ + int max_entries; + int num_entries; +}; + +/* Arbitrarily chosen, 16 means that the maximum size we'll cache for reuse + * is 1 << 16 pages, or 256MB. + */ +#define INTEL_TTM_BO_BUCKETS 16 +typedef struct _dri_bufmgr_ttm { + dri_bufmgr bufmgr; + + int fd; + unsigned int fence_type; + unsigned int fence_type_flush; + + uint32_t max_relocs; + + struct intel_validate_entry *validate_array; + int validate_array_size; + int validate_count; + + /** Array of lists of cached drmBOs of power-of-two sizes */ + struct dri_ttm_bo_bucket cache_bucket[INTEL_TTM_BO_BUCKETS]; +} dri_bufmgr_ttm; + +/** + * Private information associated with a relocation that isn't already stored + * in the relocation buffer to be passed to the kernel. + */ +struct dri_ttm_reloc { + dri_bo *target_buf; + uint64_t validate_flags; + /** Offset of target_buf after last execution of this relocation entry. */ + unsigned int last_target_offset; +}; + +typedef struct _dri_bo_ttm { + dri_bo bo; + + int refcount; + unsigned int map_count; + drmBO drm_bo; + const char *name; + + uint64_t last_flags; + + /** + * Index of the buffer within the validation list while preparing a + * batchbuffer execution. + */ + int validate_index; + + /** DRM buffer object containing relocation list */ + uint32_t *reloc_buf_data; + struct dri_ttm_reloc *relocs; + + /** + * Indicates that the buffer may be shared with other processes, so we + * can't hold maps beyond when the user does. + */ + GLboolean shared; + + GLboolean delayed_unmap; + /* Virtual address from the dri_bo_map whose unmap was delayed. */ + void *saved_virtual; +} dri_bo_ttm; + +typedef struct _dri_fence_ttm +{ + dri_fence fence; + + int refcount; + const char *name; + drmFence drm_fence; +} dri_fence_ttm; + +static int +logbase2(int n) +{ + GLint i = 1; + GLint log2 = 0; + + while (n > i) { + i *= 2; + log2++; + } + + return log2; +} + +static struct dri_ttm_bo_bucket * +dri_ttm_bo_bucket_for_size(dri_bufmgr_ttm *bufmgr_ttm, unsigned long size) +{ + int i; + + /* We only do buckets in power of two increments */ + if ((size & (size - 1)) != 0) + return NULL; + + /* We should only see sizes rounded to pages. */ + assert((size % 4096) == 0); + + /* We always allocate in units of pages */ + i = ffs(size / 4096) - 1; + if (i >= INTEL_TTM_BO_BUCKETS) + return NULL; + + return &bufmgr_ttm->cache_bucket[i]; +} + + +static void dri_ttm_dump_validation_list(dri_bufmgr_ttm *bufmgr_ttm) +{ + int i, j; + + for (i = 0; i < bufmgr_ttm->validate_count; i++) { + dri_bo *bo = bufmgr_ttm->validate_array[i].bo; + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + + if (bo_ttm->reloc_buf_data != NULL) { + for (j = 0; j < (bo_ttm->reloc_buf_data[0] & 0xffff); j++) { + uint32_t *reloc_entry = bo_ttm->reloc_buf_data + + I915_RELOC_HEADER + + j * I915_RELOC0_STRIDE; + dri_bo *target_bo = bo_ttm->relocs[j].target_buf; + dri_bo_ttm *target_ttm = (dri_bo_ttm *)target_bo; + + DBG("%2d: %s@0x%08x -> %s@0x%08lx + 0x%08x\n", + i, + bo_ttm->name, reloc_entry[0], + target_ttm->name, target_bo->offset, + reloc_entry[1]); + } + } else { + DBG("%2d: %s\n", i, bo_ttm->name); + } + } +} + +/** + * Adds the given buffer to the list of buffers to be validated (moved into the + * appropriate memory type) with the next batch submission. + * + * If a buffer is validated multiple times in a batch submission, it ends up + * with the intersection of the memory type flags and the union of the + * access flags. + */ +static void +intel_add_validate_buffer(dri_bo *buf, + uint64_t flags) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; + dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; + + /* If we delayed doing an unmap to mitigate map/unmap syscall thrashing, + * do that now. + */ + if (ttm_buf->delayed_unmap) { + drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo); + ttm_buf->delayed_unmap = GL_FALSE; + } + + if (ttm_buf->validate_index == -1) { + struct intel_validate_entry *entry; + struct drm_i915_op_arg *arg; + struct drm_bo_op_req *req; + int index; + + /* Extend the array of validation entries as necessary. */ + if (bufmgr_ttm->validate_count == bufmgr_ttm->validate_array_size) { + int i, new_size = bufmgr_ttm->validate_array_size * 2; + + if (new_size == 0) + new_size = 5; + + bufmgr_ttm->validate_array = + realloc(bufmgr_ttm->validate_array, + sizeof(struct intel_validate_entry) * new_size); + bufmgr_ttm->validate_array_size = new_size; + + /* Update pointers for realloced mem. */ + for (i = 0; i < bufmgr_ttm->validate_count - 1; i++) { + bufmgr_ttm->validate_array[i].bo_arg.next = (unsigned long) + &bufmgr_ttm->validate_array[i + 1].bo_arg; + } + } + + /* Pick out the new array entry for ourselves */ + index = bufmgr_ttm->validate_count; + ttm_buf->validate_index = index; + entry = &bufmgr_ttm->validate_array[index]; + bufmgr_ttm->validate_count++; + + /* Fill in array entry */ + entry->bo = buf; + dri_bo_reference(buf); + + /* Fill in kernel arg */ + arg = &entry->bo_arg; + req = &arg->d.req; + + memset(arg, 0, sizeof(*arg)); + req->bo_req.handle = ttm_buf->drm_bo.handle; + req->op = drm_bo_validate; + req->bo_req.flags = flags; + req->bo_req.hint = 0; +#ifdef DRM_BO_HINT_PRESUMED_OFFSET + /* PRESUMED_OFFSET indicates that all relocations pointing at this + * buffer have the correct offset. If any of our relocations don't, + * this flag will be cleared off the buffer later in the relocation + * processing. + */ + req->bo_req.hint |= DRM_BO_HINT_PRESUMED_OFFSET; + req->bo_req.presumed_offset = buf->offset; +#endif + req->bo_req.mask = INTEL_BO_MASK; + req->bo_req.fence_class = 0; /* Backwards compat. */ + + if (ttm_buf->reloc_buf_data != NULL) + arg->reloc_ptr = (unsigned long)(void *)ttm_buf->reloc_buf_data; + else + arg->reloc_ptr = 0; + + /* Hook up the linked list of args for the kernel */ + arg->next = 0; + if (index != 0) { + bufmgr_ttm->validate_array[index - 1].bo_arg.next = + (unsigned long)arg; + } + } else { + struct intel_validate_entry *entry = + &bufmgr_ttm->validate_array[ttm_buf->validate_index]; + struct drm_i915_op_arg *arg = &entry->bo_arg; + struct drm_bo_op_req *req = &arg->d.req; + uint64_t memFlags = req->bo_req.flags & flags & DRM_BO_MASK_MEM; + uint64_t modeFlags = (req->bo_req.flags | flags) & ~DRM_BO_MASK_MEM; + + /* Buffer was already in the validate list. Extend its flags as + * necessary. + */ + + if (memFlags == 0) { + fprintf(stderr, + "%s: No shared memory types between " + "0x%16llx and 0x%16llx\n", + __FUNCTION__, req->bo_req.flags, flags); + abort(); + } + if (flags & ~INTEL_BO_MASK) { + fprintf(stderr, + "%s: Flags bits 0x%16llx are not supposed to be used in a relocation\n", + __FUNCTION__, flags & ~INTEL_BO_MASK); + abort(); + } + req->bo_req.flags = memFlags | modeFlags; + } +} + + +#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ + sizeof(uint32_t)) + +static int +intel_setup_reloc_list(dri_bo *bo) +{ + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr; + + bo_ttm->relocs = calloc(bufmgr_ttm->max_relocs, + sizeof(struct dri_ttm_reloc)); + bo_ttm->reloc_buf_data = calloc(1, RELOC_BUF_SIZE(bufmgr_ttm->max_relocs)); + + /* Initialize the relocation list with the header: + * DWORD 0: relocation count + * DWORD 1: relocation type + * DWORD 2+3: handle to next relocation list (currently none) 64-bits + */ + bo_ttm->reloc_buf_data[0] = 0; + bo_ttm->reloc_buf_data[1] = I915_RELOC_TYPE_0; + bo_ttm->reloc_buf_data[2] = 0; + bo_ttm->reloc_buf_data[3] = 0; + + return 0; +} + +#if 0 +int +driFenceSignaled(DriFenceObject * fence, unsigned type) +{ + int signaled; + int ret; + + if (fence == NULL) + return GL_TRUE; + + ret = drmFenceSignaled(bufmgr_ttm->fd, &fence->fence, type, &signaled); + BM_CKFATAL(ret); + return signaled; +} +#endif + +static dri_bo * +dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name, + unsigned long size, unsigned int alignment, + uint64_t location_mask) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + dri_bo_ttm *ttm_buf; + unsigned int pageSize = getpagesize(); + int ret; + uint64_t flags; + unsigned int hint; + unsigned long alloc_size; + struct dri_ttm_bo_bucket *bucket; + GLboolean alloc_from_cache = GL_FALSE; + + ttm_buf = calloc(1, sizeof(*ttm_buf)); + if (!ttm_buf) + return NULL; + + /* The mask argument doesn't do anything for us that we want other than + * determine which pool (TTM or local) the buffer is allocated into, so + * just pass all of the allocation class flags. + */ + flags = location_mask | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE | + DRM_BO_FLAG_EXE; + /* No hints we want to use. */ + hint = 0; + + /* Round the allocated size up to a power of two number of pages. */ + alloc_size = 1 << logbase2(size); + if (alloc_size < pageSize) + alloc_size = pageSize; + bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, alloc_size); + + /* If we don't have caching at this size, don't actually round the + * allocation up. + */ + if (bucket == NULL || bucket->max_entries == 0) + alloc_size = size; + + /* Get a buffer out of the cache if available */ + if (bucket != NULL && bucket->num_entries > 0) { + struct dri_ttm_bo_bucket_entry *entry = bucket->head; + int busy; + + /* Check if the buffer is still in flight. If not, reuse it. */ + ret = drmBOBusy(bufmgr_ttm->fd, &entry->drm_bo, &busy); + alloc_from_cache = (ret == 0 && busy == 0); + + if (alloc_from_cache) { + bucket->head = entry->next; + if (entry->next == NULL) + bucket->tail = &bucket->head; + bucket->num_entries--; + + ttm_buf->drm_bo = entry->drm_bo; + free(entry); + } + } + + if (!alloc_from_cache) { + ret = drmBOCreate(bufmgr_ttm->fd, alloc_size, alignment / pageSize, + NULL, flags, hint, &ttm_buf->drm_bo); + if (ret != 0) { + free(ttm_buf); + return NULL; + } + } + + ttm_buf->bo.size = size; + ttm_buf->bo.offset = ttm_buf->drm_bo.offset; + ttm_buf->bo.virtual = NULL; + ttm_buf->bo.bufmgr = bufmgr; + ttm_buf->name = name; + ttm_buf->refcount = 1; + ttm_buf->reloc_buf_data = NULL; + ttm_buf->relocs = NULL; + ttm_buf->last_flags = ttm_buf->drm_bo.flags; + ttm_buf->shared = GL_FALSE; + ttm_buf->delayed_unmap = GL_FALSE; + ttm_buf->validate_index = -1; + + DBG("bo_create: %p (%s) %ldb\n", &ttm_buf->bo, ttm_buf->name, size); + + return &ttm_buf->bo; +} + +/* Our TTM backend doesn't allow creation of static buffers, as that requires + * privelege for the non-fake case, and the lock in the fake case where we were + * working around the X Server not creating buffers and passing handles to us. + */ +static dri_bo * +dri_ttm_alloc_static(dri_bufmgr *bufmgr, const char *name, + unsigned long offset, unsigned long size, void *virtual, + uint64_t location_mask) +{ + return NULL; +} + +/** + * Returns a dri_bo wrapping the given buffer object handle. + * + * This can be used when one application needs to pass a buffer object + * to another. + */ +dri_bo * +intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, + unsigned int handle) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + dri_bo_ttm *ttm_buf; + int ret; + + ttm_buf = calloc(1, sizeof(*ttm_buf)); + if (!ttm_buf) + return NULL; + + ret = drmBOReference(bufmgr_ttm->fd, handle, &ttm_buf->drm_bo); + if (ret != 0) { + fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n", + name, handle, strerror(-ret)); + free(ttm_buf); + return NULL; + } + ttm_buf->bo.size = ttm_buf->drm_bo.size; + ttm_buf->bo.offset = ttm_buf->drm_bo.offset; + ttm_buf->bo.virtual = NULL; + ttm_buf->bo.bufmgr = bufmgr; + ttm_buf->name = name; + ttm_buf->refcount = 1; + ttm_buf->reloc_buf_data = NULL; + ttm_buf->relocs = NULL; + ttm_buf->last_flags = ttm_buf->drm_bo.flags; + ttm_buf->shared = GL_TRUE; + ttm_buf->delayed_unmap = GL_FALSE; + ttm_buf->validate_index = -1; + + DBG("bo_create_from_handle: %p %08x (%s)\n", + &ttm_buf->bo, handle, ttm_buf->name); + + return &ttm_buf->bo; +} + +static void +dri_ttm_bo_reference(dri_bo *buf) +{ + dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; + + ttm_buf->refcount++; +} + +static void +dri_ttm_bo_unreference(dri_bo *buf) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; + dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; + + if (!buf) + return; + + if (--ttm_buf->refcount == 0) { + struct dri_ttm_bo_bucket *bucket; + int ret; + + assert(ttm_buf->map_count == 0); + + if (ttm_buf->reloc_buf_data) { + int i; + + /* Unreference all the target buffers */ + for (i = 0; i < (ttm_buf->reloc_buf_data[0] & 0xffff); i++) + dri_bo_unreference(ttm_buf->relocs[i].target_buf); + free(ttm_buf->relocs); + + /* Free the kernel BO containing relocation entries */ + free(ttm_buf->reloc_buf_data); + ttm_buf->reloc_buf_data = NULL; + } + + if (ttm_buf->delayed_unmap) { + int ret = drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo); + + if (ret != 0) { + fprintf(stderr, "%s:%d: Error unmapping buffer %s: %s.\n", + __FILE__, __LINE__, ttm_buf->name, strerror(-ret)); + } + } + + bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, ttm_buf->drm_bo.size); + /* Put the buffer into our internal cache for reuse if we can. */ + if (!ttm_buf->shared && + bucket != NULL && + (bucket->max_entries == -1 || + (bucket->max_entries > 0 && + bucket->num_entries < bucket->max_entries))) + { + struct dri_ttm_bo_bucket_entry *entry; + + entry = calloc(1, sizeof(*entry)); + entry->drm_bo = ttm_buf->drm_bo; + + entry->next = NULL; + *bucket->tail = entry; + bucket->tail = &entry->next; + bucket->num_entries++; + } else { + /* Decrement the kernel refcount for the buffer. */ + ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo); + if (ret != 0) { + fprintf(stderr, "drmBOUnreference failed (%s): %s\n", + ttm_buf->name, strerror(-ret)); + } + } + + DBG("bo_unreference final: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); + + free(buf); + return; + } +} + +static int +dri_ttm_bo_map(dri_bo *buf, GLboolean write_enable) +{ + dri_bufmgr_ttm *bufmgr_ttm; + dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; + uint64_t flags; + int ret; + + bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; + + flags = DRM_BO_FLAG_READ; + if (write_enable) + flags |= DRM_BO_FLAG_WRITE; + + /* Allow recursive mapping. Mesa may recursively map buffers with + * nested display loops. + */ + if (ttm_buf->map_count++ != 0) + return 0; + + assert(buf->virtual == NULL); + + DBG("bo_map: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); + + /* XXX: What about if we're upgrading from READ to WRITE? */ + if (ttm_buf->delayed_unmap) { + buf->virtual = ttm_buf->saved_virtual; + return 0; + } + + ret = drmBOMap(bufmgr_ttm->fd, &ttm_buf->drm_bo, flags, 0, &buf->virtual); + if (ret != 0) { + fprintf(stderr, "%s:%d: Error mapping buffer %s: %s .\n", + __FILE__, __LINE__, ttm_buf->name, strerror(-ret)); + } + + return ret; +} + +static int +dri_ttm_bo_unmap(dri_bo *buf) +{ + dri_bufmgr_ttm *bufmgr_ttm; + dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; + int ret; + + if (buf == NULL) + return 0; + + assert(ttm_buf->map_count != 0); + if (--ttm_buf->map_count != 0) + return 0; + + bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; + + assert(buf->virtual != NULL); + + DBG("bo_unmap: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); + + if (!ttm_buf->shared) { + ttm_buf->saved_virtual = buf->virtual; + ttm_buf->delayed_unmap = GL_TRUE; + buf->virtual = NULL; + + return 0; + } + + buf->virtual = NULL; + + ret = drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo); + if (ret != 0) { + fprintf(stderr, "%s:%d: Error unmapping buffer %s: %s.\n", + __FILE__, __LINE__, ttm_buf->name, strerror(-ret)); + } + + return ret; +} + +/** + * Returns a dri_bo wrapping the given buffer object handle. + * + * This can be used when one application needs to pass a buffer object + * to another. + */ +dri_fence * +intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name, + drm_fence_arg_t *arg) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + dri_fence_ttm *ttm_fence; + + ttm_fence = malloc(sizeof(*ttm_fence)); + if (!ttm_fence) + return NULL; + + ttm_fence->drm_fence.handle = arg->handle; + ttm_fence->drm_fence.fence_class = arg->fence_class; + ttm_fence->drm_fence.type = arg->type; + ttm_fence->drm_fence.flags = arg->flags; + ttm_fence->drm_fence.signaled = 0; + ttm_fence->drm_fence.sequence = arg->sequence; + + ttm_fence->fence.bufmgr = bufmgr; + ttm_fence->name = name; + ttm_fence->refcount = 1; + + DBG("fence_create_from_handle: %p (%s)\n", + &ttm_fence->fence, ttm_fence->name); + + return &ttm_fence->fence; +} + + +static void +dri_ttm_fence_reference(dri_fence *fence) +{ + dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence; + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr; + + ++fence_ttm->refcount; + DBG("fence_reference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name); +} + +static void +dri_ttm_fence_unreference(dri_fence *fence) +{ + dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence; + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr; + + if (!fence) + return; + + DBG("fence_unreference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name); + + if (--fence_ttm->refcount == 0) { + int ret; + + ret = drmFenceUnreference(bufmgr_ttm->fd, &fence_ttm->drm_fence); + if (ret != 0) { + fprintf(stderr, "drmFenceUnreference failed (%s): %s\n", + fence_ttm->name, strerror(-ret)); + } + + free(fence); + return; + } +} + +static void +dri_ttm_fence_wait(dri_fence *fence) +{ + dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence; + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr; + int ret; + + ret = drmFenceWait(bufmgr_ttm->fd, DRM_FENCE_FLAG_WAIT_LAZY, &fence_ttm->drm_fence, 0); + if (ret != 0) { + fprintf(stderr, "%s:%d: Error waiting for fence %s: %s.\n", + __FILE__, __LINE__, fence_ttm->name, strerror(-ret)); + abort(); + } + + DBG("fence_wait: %p (%s)\n", &fence_ttm->fence, fence_ttm->name); +} + +static void +dri_bufmgr_ttm_destroy(dri_bufmgr *bufmgr) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + int i; + + free(bufmgr_ttm->validate_array); + + /* Free any cached buffer objects we were going to reuse */ + for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) { + struct dri_ttm_bo_bucket *bucket = &bufmgr_ttm->cache_bucket[i]; + struct dri_ttm_bo_bucket_entry *entry; + + while ((entry = bucket->head) != NULL) { + int ret; + + bucket->head = entry->next; + if (entry->next == NULL) + bucket->tail = &bucket->head; + bucket->num_entries--; + + /* Decrement the kernel refcount for the buffer. */ + ret = drmBOUnreference(bufmgr_ttm->fd, &entry->drm_bo); + if (ret != 0) { + fprintf(stderr, "drmBOUnreference failed: %s\n", + strerror(-ret)); + } + + free(entry); + } + } + + free(bufmgr); +} + +/** + * Adds the target buffer to the validation list and adds the relocation + * to the reloc_buffer's relocation list. + * + * The relocation entry at the given offset must already contain the + * precomputed relocation value, because the kernel will optimize out + * the relocation entry write when the buffer hasn't moved from the + * last known offset in target_buf. + */ +static int +dri_ttm_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, + GLuint offset, dri_bo *target_buf) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)reloc_buf->bufmgr; + dri_bo_ttm *reloc_buf_ttm = (dri_bo_ttm *)reloc_buf; + dri_bo_ttm *target_buf_ttm = (dri_bo_ttm *)target_buf; + int num_relocs; + uint32_t *this_reloc; + + /* Create a new relocation list if needed */ + if (reloc_buf_ttm->reloc_buf_data == NULL) + intel_setup_reloc_list(reloc_buf); + + num_relocs = reloc_buf_ttm->reloc_buf_data[0]; + + /* Check overflow */ + assert(num_relocs < bufmgr_ttm->max_relocs); + + this_reloc = reloc_buf_ttm->reloc_buf_data + I915_RELOC_HEADER + + num_relocs * I915_RELOC0_STRIDE; + + this_reloc[0] = offset; + this_reloc[1] = delta; + this_reloc[2] = target_buf_ttm->drm_bo.handle; /* To be filled in at exec time */ + this_reloc[3] = 0; + + reloc_buf_ttm->relocs[num_relocs].validate_flags = flags; + reloc_buf_ttm->relocs[num_relocs].target_buf = target_buf; + dri_bo_reference(target_buf); + + reloc_buf_ttm->reloc_buf_data[0]++; /* Increment relocation count */ + /* Check wraparound */ + assert(reloc_buf_ttm->reloc_buf_data[0] != 0); + return 0; +} + +/** + * Walk the tree of relocations rooted at BO and accumulate the list of + * validations to be performed and update the relocation buffers with + * index values into the validation list. + */ +static void +dri_ttm_bo_process_reloc(dri_bo *bo) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr; + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + unsigned int nr_relocs; + int i; + + if (bo_ttm->reloc_buf_data == NULL) + return; + + nr_relocs = bo_ttm->reloc_buf_data[0] & 0xffff; + + for (i = 0; i < nr_relocs; i++) { + struct dri_ttm_reloc *r = &bo_ttm->relocs[i]; + + /* Continue walking the tree depth-first. */ + dri_ttm_bo_process_reloc(r->target_buf); + + /* Add the target to the validate list */ + intel_add_validate_buffer(r->target_buf, r->validate_flags); + + /* Clear the PRESUMED_OFFSET flag from the validate list entry of the + * target if this buffer has a stale relocated pointer at it. + */ + if (r->last_target_offset != r->target_buf->offset) { + dri_bo_ttm *target_buf_ttm = (dri_bo_ttm *)r->target_buf; + struct intel_validate_entry *entry = + &bufmgr_ttm->validate_array[target_buf_ttm->validate_index]; + + entry->bo_arg.d.req.bo_req.hint &= ~DRM_BO_HINT_PRESUMED_OFFSET; + } + } +} + +static void * +dri_ttm_process_reloc(dri_bo *batch_buf, GLuint *count) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr; + + /* Update indices and set up the validate list. */ + dri_ttm_bo_process_reloc(batch_buf); + + /* Add the batch buffer to the validation list. There are no relocations + * pointing to it. + */ + intel_add_validate_buffer(batch_buf, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE); + + *count = bufmgr_ttm->validate_count; + return &bufmgr_ttm->validate_array[0].bo_arg; +} + +static const char * +intel_get_flags_mem_type_string(uint64_t flags) +{ + switch (flags & DRM_BO_MASK_MEM) { + case DRM_BO_FLAG_MEM_LOCAL: return "local"; + case DRM_BO_FLAG_MEM_TT: return "ttm"; + case DRM_BO_FLAG_MEM_VRAM: return "vram"; + case DRM_BO_FLAG_MEM_PRIV0: return "priv0"; + case DRM_BO_FLAG_MEM_PRIV1: return "priv1"; + case DRM_BO_FLAG_MEM_PRIV2: return "priv2"; + case DRM_BO_FLAG_MEM_PRIV3: return "priv3"; + case DRM_BO_FLAG_MEM_PRIV4: return "priv4"; + default: return NULL; + } +} + +static const char * +intel_get_flags_caching_string(uint64_t flags) +{ + switch (flags & (DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED)) { + case 0: return "UU"; + case DRM_BO_FLAG_CACHED: return "CU"; + case DRM_BO_FLAG_CACHED_MAPPED: return "UC"; + case DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED: return "CC"; + default: return NULL; + } +} + +static void +intel_update_buffer_offsets (dri_bufmgr_ttm *bufmgr_ttm) +{ + int i; + + for (i = 0; i < bufmgr_ttm->validate_count; i++) { + dri_bo *bo = bufmgr_ttm->validate_array[i].bo; + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + struct drm_i915_op_arg *arg = &bufmgr_ttm->validate_array[i].bo_arg; + struct drm_bo_arg_rep *rep = &arg->d.rep; + + /* Update the flags */ + if (rep->bo_info.flags != bo_ttm->last_flags) { + DBG("BO %s migrated: %s/%s -> %s/%s\n", + bo_ttm->name, + intel_get_flags_mem_type_string(bo_ttm->last_flags), + intel_get_flags_caching_string(bo_ttm->last_flags), + intel_get_flags_mem_type_string(rep->bo_info.flags), + intel_get_flags_caching_string(rep->bo_info.flags)); + + bo_ttm->last_flags = rep->bo_info.flags; + } + /* Update the buffer offset */ + if (rep->bo_info.offset != bo->offset) { + DBG("BO %s migrated: 0x%08lx -> 0x%08lx\n", + bo_ttm->name, bo->offset, (unsigned long)rep->bo_info.offset); + bo->offset = rep->bo_info.offset; + } + } +} + +/** + * Update the last target offset field of relocation entries for PRESUMED_OFFSET + * computation. + */ +static void +dri_ttm_bo_post_submit(dri_bo *bo) +{ + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + unsigned int nr_relocs; + int i; + + if (bo_ttm->reloc_buf_data == NULL) + return; + + nr_relocs = bo_ttm->reloc_buf_data[0] & 0xffff; + + for (i = 0; i < nr_relocs; i++) { + struct dri_ttm_reloc *r = &bo_ttm->relocs[i]; + + /* Continue walking the tree depth-first. */ + dri_ttm_bo_post_submit(r->target_buf); + + r->last_target_offset = r->target_buf->offset; + } +} + +static void +dri_ttm_post_submit(dri_bo *batch_buf, dri_fence **last_fence) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr; + int i; + + intel_update_buffer_offsets (bufmgr_ttm); + + dri_ttm_bo_post_submit(batch_buf); + + if (bufmgr_ttm->bufmgr.debug) + dri_ttm_dump_validation_list(bufmgr_ttm); + + for (i = 0; i < bufmgr_ttm->validate_count; i++) { + dri_bo *bo = bufmgr_ttm->validate_array[i].bo; + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + + /* Disconnect the buffer from the validate list */ + bo_ttm->validate_index = -1; + dri_bo_unreference(bo); + bufmgr_ttm->validate_array[i].bo = NULL; + } + bufmgr_ttm->validate_count = 0; +} + +/** + * Enables unlimited caching of buffer objects for reuse. + * + * This is potentially very memory expensive, as the cache at each bucket + * size is only bounded by how many buffers of that size we've managed to have + * in flight at once. + */ +void +intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + int i; + + for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) { + bufmgr_ttm->cache_bucket[i].max_entries = -1; + } +} + +/* + * + */ +static int +dri_ttm_check_aperture_space(dri_bo *bo) +{ + return 0; +} + +/** + * Initializes the TTM buffer manager, which uses the kernel to allocate, map, + * and manage map buffer objections. + * + * \param fd File descriptor of the opened DRM device. + * \param fence_type Driver-specific fence type used for fences with no flush. + * \param fence_type_flush Driver-specific fence type used for fences with a + * flush. + */ +dri_bufmgr * +intel_bufmgr_ttm_init(int fd, unsigned int fence_type, + unsigned int fence_type_flush, int batch_size) +{ + dri_bufmgr_ttm *bufmgr_ttm; + int i; + + bufmgr_ttm = calloc(1, sizeof(*bufmgr_ttm)); + bufmgr_ttm->fd = fd; + bufmgr_ttm->fence_type = fence_type; + bufmgr_ttm->fence_type_flush = fence_type_flush; + + /* Let's go with one relocation per every 2 dwords (but round down a bit + * since a power of two will mean an extra page allocation for the reloc + * buffer). + * + * Every 4 was too few for the blender benchmark. + */ + bufmgr_ttm->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; + + bufmgr_ttm->bufmgr.bo_alloc = dri_ttm_alloc; + bufmgr_ttm->bufmgr.bo_alloc_static = dri_ttm_alloc_static; + bufmgr_ttm->bufmgr.bo_reference = dri_ttm_bo_reference; + bufmgr_ttm->bufmgr.bo_unreference = dri_ttm_bo_unreference; + bufmgr_ttm->bufmgr.bo_map = dri_ttm_bo_map; + bufmgr_ttm->bufmgr.bo_unmap = dri_ttm_bo_unmap; + bufmgr_ttm->bufmgr.fence_reference = dri_ttm_fence_reference; + bufmgr_ttm->bufmgr.fence_unreference = dri_ttm_fence_unreference; + bufmgr_ttm->bufmgr.fence_wait = dri_ttm_fence_wait; + bufmgr_ttm->bufmgr.destroy = dri_bufmgr_ttm_destroy; + bufmgr_ttm->bufmgr.emit_reloc = dri_ttm_emit_reloc; + bufmgr_ttm->bufmgr.process_relocs = dri_ttm_process_reloc; + bufmgr_ttm->bufmgr.post_submit = dri_ttm_post_submit; + bufmgr_ttm->bufmgr.debug = GL_FALSE; + bufmgr_ttm->bufmgr.check_aperture_space = dri_ttm_check_aperture_space; + /* Initialize the linked lists for BO reuse cache. */ + for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) + bufmgr_ttm->cache_bucket[i].tail = &bufmgr_ttm->cache_bucket[i].head; + + return &bufmgr_ttm->bufmgr; +} +#else +dri_bufmgr * +intel_bufmgr_ttm_init(int fd, unsigned int fence_type, + unsigned int fence_type_flush, int batch_size) +{ + return NULL; +} + +dri_bo * +intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, + unsigned int handle) +{ + return NULL; +} + +void +intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr) +{ +} +#endif |