Merge remote branch 'origin/master' into nv50-compiler

Conflicts:
	src/gallium/drivers/nv50/nv50_program.c
This commit is contained in:
Christoph Bumiller
2010-09-02 18:31:49 +02:00
1069 changed files with 116927 additions and 48446 deletions
+3
View File
@@ -5,6 +5,9 @@ if 'egl' in env['statetrackers']:
SConscript('egl/main/SConscript')
if 'mesa' in env['statetrackers']:
if platform == 'windows':
SConscript('talloc/SConscript')
SConscript('glsl/SConscript')
SConscript('mapi/glapi/SConscript')
SConscript('mesa/SConscript')
+1 -1
View File
@@ -14,7 +14,7 @@ Contact
Status
Preliminary - totally subject to change.
Obsolete.
Version
+223
View File
@@ -836,6 +836,7 @@ dri2_initialize_x11(_EGLDriver *drv, _EGLDisplay *disp,
goto cleanup_configs;
}
disp->Extensions.MESA_drm_image = EGL_TRUE;
disp->Extensions.KHR_image_base = EGL_TRUE;
disp->Extensions.KHR_image_pixmap = EGL_TRUE;
disp->Extensions.KHR_gl_renderbuffer_image = EGL_TRUE;
@@ -994,6 +995,7 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp,
for (i = 0; dri2_dpy->driver_configs[i]; i++)
dri2_add_config(disp, dri2_dpy->driver_configs[i], i + 1, 0, 0);
disp->Extensions.MESA_drm_image = EGL_TRUE;
disp->Extensions.KHR_image_base = EGL_TRUE;
disp->Extensions.KHR_gl_renderbuffer_image = EGL_TRUE;
disp->Extensions.KHR_gl_texture_2D_image = EGL_TRUE;
@@ -1620,6 +1622,96 @@ dri2_create_image_khr_renderbuffer(_EGLDisplay *disp, _EGLContext *ctx,
return &dri2_img->base;
}
static _EGLImage *
dri2_create_image_mesa_drm_buffer(_EGLDisplay *disp, _EGLContext *ctx,
EGLClientBuffer buffer, const EGLint *attr_list)
{
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
struct dri2_egl_image *dri2_img;
EGLint width, height, format, name, stride, pitch, i, err;
name = (EGLint) buffer;
err = EGL_SUCCESS;
width = 0;
height = 0;
format = 0;
stride = 0;
for (i = 0; attr_list[i] != EGL_NONE; i++) {
EGLint attr = attr_list[i++];
EGLint val = attr_list[i];
switch (attr) {
case EGL_WIDTH:
width = val;
break;
case EGL_HEIGHT:
height = val;
break;
case EGL_DRM_BUFFER_FORMAT_MESA:
format = val;
break;
case EGL_DRM_BUFFER_STRIDE_MESA:
stride = val;
break;
default:
err = EGL_BAD_ATTRIBUTE;
break;
}
if (err != EGL_SUCCESS) {
_eglLog(_EGL_WARNING, "bad image attribute 0x%04x", attr);
return NULL;
}
}
if (width <= 0 || height <= 0 || stride <= 0) {
_eglError(EGL_BAD_PARAMETER,
"bad width, height or stride");
return NULL;
}
switch (format) {
case EGL_DRM_BUFFER_FORMAT_ARGB32_MESA:
format = __DRI_IMAGE_FORMAT_ARGB8888;
pitch = stride;
break;
default:
_eglError(EGL_BAD_PARAMETER,
"dri2_create_image_khr: unsupported pixmap depth");
return NULL;
}
dri2_img = malloc(sizeof *dri2_img);
if (!dri2_img) {
_eglError(EGL_BAD_ALLOC, "dri2_create_image_mesa_drm");
return NULL;
}
if (!_eglInitImage(&dri2_img->base, disp, attr_list)) {
free(dri2_img);
return NULL;
}
dri2_img->dri_image =
dri2_dpy->image->createImageFromName(dri2_ctx->dri_context,
width,
height,
format,
name,
pitch,
dri2_img);
if (dri2_img->dri_image == NULL) {
free(dri2_img);
_eglError(EGL_BAD_ALLOC, "dri2_create_image_mesa_drm");
return NULL;
}
return &dri2_img->base;
}
static _EGLImage *
dri2_create_image_khr(_EGLDriver *drv, _EGLDisplay *disp,
_EGLContext *ctx, EGLenum target,
@@ -1630,6 +1722,8 @@ dri2_create_image_khr(_EGLDriver *drv, _EGLDisplay *disp,
return dri2_create_image_khr_pixmap(disp, ctx, buffer, attr_list);
case EGL_GL_RENDERBUFFER_KHR:
return dri2_create_image_khr_renderbuffer(disp, ctx, buffer, attr_list);
case EGL_DRM_BUFFER_MESA:
return dri2_create_image_mesa_drm_buffer(disp, ctx, buffer, attr_list);
default:
_eglError(EGL_BAD_PARAMETER, "dri2_create_image_khr");
return EGL_NO_IMAGE_KHR;
@@ -1648,6 +1742,133 @@ dri2_destroy_image_khr(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *image)
return EGL_TRUE;
}
static _EGLImage *
dri2_create_drm_image_mesa(_EGLDriver *drv, _EGLDisplay *disp,
const EGLint *attr_list)
{
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
struct dri2_egl_image *dri2_img;
int width, height, format, i;
unsigned int use, dri_use, valid_mask;
EGLint err = EGL_SUCCESS;
dri2_img = malloc(sizeof *dri2_img);
if (!dri2_img) {
_eglError(EGL_BAD_ALLOC, "dri2_create_image_khr");
return EGL_NO_IMAGE_KHR;
}
if (!attr_list) {
err = EGL_BAD_PARAMETER;
goto cleanup_img;
}
if (!_eglInitImage(&dri2_img->base, disp, attr_list)) {
err = EGL_BAD_PARAMETER;
goto cleanup_img;
}
width = 0;
height = 0;
format = 0;
use = 0;
for (i = 0; attr_list[i] != EGL_NONE; i++) {
EGLint attr = attr_list[i++];
EGLint val = attr_list[i];
switch (attr) {
case EGL_WIDTH:
width = val;
break;
case EGL_HEIGHT:
height = val;
break;
case EGL_DRM_BUFFER_FORMAT_MESA:
format = val;
break;
case EGL_DRM_BUFFER_USE_MESA:
use = val;
break;
default:
err = EGL_BAD_ATTRIBUTE;
break;
}
if (err != EGL_SUCCESS) {
_eglLog(_EGL_WARNING, "bad image attribute 0x%04x", attr);
goto cleanup_img;
}
}
if (width <= 0 || height <= 0) {
_eglLog(_EGL_WARNING, "bad width or height (%dx%d)", width, height);
goto cleanup_img;
}
switch (format) {
case EGL_DRM_BUFFER_FORMAT_ARGB32_MESA:
format = __DRI_IMAGE_FORMAT_ARGB8888;
break;
default:
_eglLog(_EGL_WARNING, "bad image format value 0x%04x", format);
goto cleanup_img;
}
valid_mask =
EGL_DRM_BUFFER_USE_SCANOUT_MESA |
EGL_DRM_BUFFER_USE_SHARE_MESA;
if (use & ~valid_mask) {
_eglLog(_EGL_WARNING, "bad image use bit 0x%04x", use & ~valid_mask);
goto cleanup_img;
}
dri_use = 0;
if (use & EGL_DRM_BUFFER_USE_SHARE_MESA)
dri_use |= __DRI_IMAGE_USE_SHARE;
if (use & EGL_DRM_BUFFER_USE_SCANOUT_MESA)
dri_use |= __DRI_IMAGE_USE_SCANOUT;
dri2_img->dri_image =
dri2_dpy->image->createImage(dri2_dpy->dri_screen,
width, height, format, dri_use, dri2_img);
if (dri2_img->dri_image == NULL) {
err = EGL_BAD_ALLOC;
goto cleanup_img;
}
return &dri2_img->base;
cleanup_img:
free(dri2_img);
_eglError(err, "dri2_create_drm_image_mesa");
return EGL_NO_IMAGE_KHR;
}
static EGLBoolean
dri2_export_drm_image_mesa(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *img,
EGLint *name, EGLint *handle, EGLint *stride)
{
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
struct dri2_egl_image *dri2_img = dri2_egl_image(img);
if (name && !dri2_dpy->image->queryImage(dri2_img->dri_image,
__DRI_IMAGE_ATTRIB_NAME, name)) {
_eglError(EGL_BAD_ALLOC, "dri2_export_drm_image_mesa");
return EGL_FALSE;
}
if (handle)
dri2_dpy->image->queryImage(dri2_img->dri_image,
__DRI_IMAGE_ATTRIB_HANDLE, handle);
if (stride)
dri2_dpy->image->queryImage(dri2_img->dri_image,
__DRI_IMAGE_ATTRIB_STRIDE, stride);
return EGL_TRUE;
}
/**
* This is the main entrypoint into the driver, called by libEGL.
* Create a new _EGLDriver object and init its dispatch table.
@@ -1681,6 +1902,8 @@ _eglMain(const char *args)
dri2_drv->base.API.CreateImageKHR = dri2_create_image_khr;
dri2_drv->base.API.DestroyImageKHR = dri2_destroy_image_khr;
dri2_drv->base.API.SwapBuffersRegionNOK = dri2_swap_buffers_region;
dri2_drv->base.API.CreateDRMImageMESA = dri2_create_drm_image_mesa;
dri2_drv->base.API.ExportDRMImageMESA = dri2_export_drm_image_mesa;
dri2_drv->base.Name = "DRI2";
dri2_drv->base.Unload = dri2_unload;
+4 -2
View File
@@ -26,7 +26,8 @@ HEADERS = \
eglmutex.h \
eglscreen.h \
eglstring.h \
eglsurface.h
eglsurface.h \
eglsync.h
SOURCES = \
eglapi.c \
@@ -44,7 +45,8 @@ SOURCES = \
eglmode.c \
eglscreen.c \
eglstring.c \
eglsurface.c
eglsurface.c \
eglsync.c
OBJECTS = $(SOURCES:.c=.o)
+3 -1
View File
@@ -12,6 +12,7 @@ if env['platform'] != 'winddk':
'_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_WINDOWS',
'_EGL_DRIVER_SEARCH_DIR=\\"\\"',
'_EGL_OS_WINDOWS',
'_EGL_GET_CORE_ADDRESSES',
'KHRONOS_DLL_EXPORTS',
])
@@ -36,11 +37,12 @@ if env['platform'] != 'winddk':
'eglscreen.c',
'eglstring.c',
'eglsurface.c',
'eglsync.c',
]
egl = env.SharedLibrary(
target = 'libEGL',
source = egl_sources,
source = egl_sources + ['egl.def'],
)
env.InstallSharedLibrary(egl, version=(1, 4, 0))
+35
View File
@@ -0,0 +1,35 @@
EXPORTS
eglBindAPI
eglBindTexImage
eglChooseConfig
eglCopyBuffers
eglCreateContext
eglCreatePbufferFromClientBuffer
eglCreatePbufferSurface
eglCreatePixmapSurface
eglCreateWindowSurface
eglDestroyContext
eglDestroySurface
eglGetConfigAttrib
eglGetConfigs
eglGetCurrentContext
eglGetCurrentDisplay
eglGetCurrentSurface
eglGetDisplay
eglGetError
eglGetProcAddress
eglInitialize
eglMakeCurrent
eglQueryAPI
eglQueryContext
eglQueryString
eglQuerySurface
eglReleaseTexImage
eglReleaseThread
eglSurfaceAttrib
eglSwapBuffers
eglSwapInterval
eglTerminate
eglWaitClient
eglWaitGL
eglWaitNative
+188 -1
View File
@@ -68,6 +68,7 @@
#include "eglscreen.h"
#include "eglmode.h"
#include "eglimage.h"
#include "eglsync.h"
/**
@@ -126,6 +127,8 @@
#define _EGL_CHECK_MODE(disp, m, ret, drv) \
_EGL_CHECK_OBJECT(disp, Mode, m, ret, drv)
#define _EGL_CHECK_SYNC(disp, s, ret, drv) \
_EGL_CHECK_OBJECT(disp, Sync, s, ret, drv)
static INLINE _EGLDriver *
@@ -185,6 +188,26 @@ _eglCheckConfig(_EGLDisplay *disp, _EGLConfig *conf, const char *msg)
}
#ifdef EGL_KHR_reusable_sync
static INLINE _EGLDriver *
_eglCheckSync(_EGLDisplay *disp, _EGLSync *s, const char *msg)
{
_EGLDriver *drv = _eglCheckDisplay(disp, msg);
if (!drv)
return NULL;
if (!s) {
_eglError(EGL_BAD_PARAMETER, msg);
return NULL;
}
return drv;
}
#endif /* EGL_KHR_reusable_sync */
#ifdef EGL_MESA_screen_surface
@@ -809,7 +832,44 @@ eglGetProcAddress(const char *procname)
const char *name;
_EGLProc function;
} egl_functions[] = {
/* extensions only */
/* core functions should not be queryable, but, well... */
#ifdef _EGL_GET_CORE_ADDRESSES
/* alphabetical order */
{ "eglBindAPI", (_EGLProc) eglBindAPI },
{ "eglBindTexImage", (_EGLProc) eglBindTexImage },
{ "eglChooseConfig", (_EGLProc) eglChooseConfig },
{ "eglCopyBuffers", (_EGLProc) eglCopyBuffers },
{ "eglCreateContext", (_EGLProc) eglCreateContext },
{ "eglCreatePbufferFromClientBuffer", (_EGLProc) eglCreatePbufferFromClientBuffer },
{ "eglCreatePbufferSurface", (_EGLProc) eglCreatePbufferSurface },
{ "eglCreatePixmapSurface", (_EGLProc) eglCreatePixmapSurface },
{ "eglCreateWindowSurface", (_EGLProc) eglCreateWindowSurface },
{ "eglDestroyContext", (_EGLProc) eglDestroyContext },
{ "eglDestroySurface", (_EGLProc) eglDestroySurface },
{ "eglGetConfigAttrib", (_EGLProc) eglGetConfigAttrib },
{ "eglGetConfigs", (_EGLProc) eglGetConfigs },
{ "eglGetCurrentContext", (_EGLProc) eglGetCurrentContext },
{ "eglGetCurrentDisplay", (_EGLProc) eglGetCurrentDisplay },
{ "eglGetCurrentSurface", (_EGLProc) eglGetCurrentSurface },
{ "eglGetDisplay", (_EGLProc) eglGetDisplay },
{ "eglGetError", (_EGLProc) eglGetError },
{ "eglGetProcAddress", (_EGLProc) eglGetProcAddress },
{ "eglInitialize", (_EGLProc) eglInitialize },
{ "eglMakeCurrent", (_EGLProc) eglMakeCurrent },
{ "eglQueryAPI", (_EGLProc) eglQueryAPI },
{ "eglQueryContext", (_EGLProc) eglQueryContext },
{ "eglQueryString", (_EGLProc) eglQueryString },
{ "eglQuerySurface", (_EGLProc) eglQuerySurface },
{ "eglReleaseTexImage", (_EGLProc) eglReleaseTexImage },
{ "eglReleaseThread", (_EGLProc) eglReleaseThread },
{ "eglSurfaceAttrib", (_EGLProc) eglSurfaceAttrib },
{ "eglSwapBuffers", (_EGLProc) eglSwapBuffers },
{ "eglSwapInterval", (_EGLProc) eglSwapInterval },
{ "eglTerminate", (_EGLProc) eglTerminate },
{ "eglWaitClient", (_EGLProc) eglWaitClient },
{ "eglWaitGL", (_EGLProc) eglWaitGL },
{ "eglWaitNative", (_EGLProc) eglWaitNative },
#endif /* _EGL_GET_CORE_ADDRESSES */
#ifdef EGL_MESA_screen_surface
{ "eglChooseModeMESA", (_EGLProc) eglChooseModeMESA },
{ "eglGetModesMESA", (_EGLProc) eglGetModesMESA },
@@ -833,6 +893,10 @@ eglGetProcAddress(const char *procname)
#endif /* EGL_KHR_image_base */
#ifdef EGL_NOK_swap_region
{ "eglSwapBuffersRegionNOK", (_EGLProc) eglSwapBuffersRegionNOK },
#endif
#ifdef EGL_MESA_drm_image
{ "eglCreateDRMImageMESA", (_EGLProc) eglCreateDRMImageMESA },
{ "eglExportDRMImageMESA", (_EGLProc) eglExportDRMImageMESA },
#endif
{ NULL, NULL }
};
@@ -1245,6 +1309,90 @@ eglDestroyImageKHR(EGLDisplay dpy, EGLImageKHR image)
#endif /* EGL_KHR_image_base */
#ifdef EGL_KHR_reusable_sync
EGLSyncKHR EGLAPIENTRY
eglCreateSyncKHR(EGLDisplay dpy, EGLenum type, const EGLint *attrib_list)
{
_EGLDisplay *disp = _eglLockDisplay(dpy);
_EGLDriver *drv;
_EGLSync *sync;
EGLSyncKHR ret;
_EGL_CHECK_DISPLAY(disp, EGL_NO_SYNC_KHR, drv);
sync = drv->API.CreateSyncKHR(drv, disp, type, attrib_list);
ret = (sync) ? _eglLinkSync(sync, disp) : EGL_NO_SYNC_KHR;
RETURN_EGL_EVAL(disp, ret);
}
EGLBoolean EGLAPIENTRY
eglDestroySyncKHR(EGLDisplay dpy, EGLSyncKHR sync)
{
_EGLDisplay *disp = _eglLockDisplay(dpy);
_EGLSync *s = _eglLookupSync(sync, disp);
_EGLDriver *drv;
EGLBoolean ret;
_EGL_CHECK_SYNC(disp, s, EGL_FALSE, drv);
_eglUnlinkSync(s);
ret = drv->API.DestroySyncKHR(drv, disp, s);
RETURN_EGL_EVAL(disp, ret);
}
EGLint EGLAPIENTRY
eglClientWaitSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout)
{
_EGLDisplay *disp = _eglLockDisplay(dpy);
_EGLSync *s = _eglLookupSync(sync, disp);
_EGLDriver *drv;
EGLint ret;
_EGL_CHECK_SYNC(disp, s, EGL_FALSE, drv);
ret = drv->API.ClientWaitSyncKHR(drv, disp, s, flags, timeout);
RETURN_EGL_EVAL(disp, ret);
}
EGLBoolean EGLAPIENTRY
eglSignalSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode)
{
_EGLDisplay *disp = _eglLockDisplay(dpy);
_EGLSync *s = _eglLookupSync(sync, disp);
_EGLDriver *drv;
EGLBoolean ret;
_EGL_CHECK_SYNC(disp, s, EGL_FALSE, drv);
ret = drv->API.SignalSyncKHR(drv, disp, s, mode);
RETURN_EGL_EVAL(disp, ret);
}
EGLBoolean EGLAPIENTRY
eglGetSyncAttribKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value)
{
_EGLDisplay *disp = _eglLockDisplay(dpy);
_EGLSync *s = _eglLookupSync(sync, disp);
_EGLDriver *drv;
EGLBoolean ret;
_EGL_CHECK_SYNC(disp, s, EGL_FALSE, drv);
ret = drv->API.GetSyncAttribKHR(drv, disp, s, attribute, value);
RETURN_EGL_EVAL(disp, ret);
}
#endif /* EGL_KHR_reusable_sync */
#ifdef EGL_NOK_swap_region
EGLBoolean EGLAPIENTRY
@@ -1272,3 +1420,42 @@ eglSwapBuffersRegionNOK(EGLDisplay dpy, EGLSurface surface,
}
#endif /* EGL_NOK_swap_region */
#ifdef EGL_MESA_drm_image
EGLImageKHR EGLAPIENTRY
eglCreateDRMImageMESA(EGLDisplay dpy, const EGLint *attr_list)
{
_EGLDisplay *disp = _eglLockDisplay(dpy);
_EGLDriver *drv;
_EGLImage *img;
EGLImageKHR ret;
_EGL_CHECK_DISPLAY(disp, EGL_NO_IMAGE_KHR, drv);
img = drv->API.CreateDRMImageMESA(drv, disp, attr_list);
ret = (img) ? _eglLinkImage(img, disp) : EGL_NO_IMAGE_KHR;
RETURN_EGL_EVAL(disp, ret);
}
EGLBoolean EGLAPIENTRY
eglExportDRMImageMESA(EGLDisplay dpy, EGLImageKHR image,
EGLint *name, EGLint *handle, EGLint *stride)
{
_EGLDisplay *disp = _eglLockDisplay(dpy);
_EGLImage *img = _eglLookupImage(image, disp);
_EGLDriver *drv;
EGLBoolean ret;
_EGL_CHECK_DISPLAY(disp, EGL_FALSE, drv);
if (!img)
RETURN_EGL_ERROR(disp, EGL_BAD_PARAMETER, EGL_FALSE);
ret = drv->API.ExportDRMImageMESA(drv, disp, img, name, handle, stride);
RETURN_EGL_EVAL(disp, ret);
}
#endif
+28
View File
@@ -76,10 +76,25 @@ typedef _EGLImage *(*CreateImageKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLCo
typedef EGLBoolean (*DestroyImageKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLImage *image);
#endif /* EGL_KHR_image_base */
#ifdef EGL_KHR_reusable_sync
typedef _EGLSync *(*CreateSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, EGLenum type, const EGLint *attrib_list);
typedef EGLBoolean (*DestroySyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync);
typedef EGLint (*ClientWaitSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLint flags, EGLTimeKHR timeout);
typedef EGLBoolean (*SignalSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLenum mode);
typedef EGLBoolean (*GetSyncAttribKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLint attribute, EGLint *value);
#endif /* EGL_KHR_reusable_sync */
#ifdef EGL_NOK_swap_region
typedef EGLBoolean (*SwapBuffersRegionNOK_t)(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf, EGLint numRects, const EGLint *rects);
#endif
#ifdef EGL_MESA_drm_image
typedef _EGLImage *(*CreateDRMImageMESA_t)(_EGLDriver *drv, _EGLDisplay *disp, const EGLint *attr_list);
typedef EGLBoolean (*ExportDRMImageMESA_t)(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *img, EGLint *name, EGLint *handle, EGLint *stride);
#endif
/**
* The API dispatcher jumps through these functions
*/
@@ -138,9 +153,22 @@ struct _egl_api
DestroyImageKHR_t DestroyImageKHR;
#endif /* EGL_KHR_image_base */
#ifdef EGL_KHR_reusable_sync
CreateSyncKHR_t CreateSyncKHR;
DestroySyncKHR_t DestroySyncKHR;
ClientWaitSyncKHR_t ClientWaitSyncKHR;
SignalSyncKHR_t SignalSyncKHR;
GetSyncAttribKHR_t GetSyncAttribKHR;
#endif /* EGL_KHR_reusable_sync */
#ifdef EGL_NOK_swap_region
SwapBuffersRegionNOK_t SwapBuffersRegionNOK;
#endif
#ifdef EGL_MESA_drm_image
CreateDRMImageMESA_t CreateDRMImageMESA;
ExportDRMImageMESA_t ExportDRMImageMESA;
#endif
};
#endif /* EGLAPI_INCLUDED */
+5 -2
View File
@@ -460,11 +460,14 @@ _eglMatchConfig(const _EGLConfig *conf, const _EGLConfig *criteria)
}
if (!matched) {
#ifdef DEBUG
#ifndef DEBUG
/* only print the common errors when DEBUG is not defined */
if (attr != EGL_RENDERABLE_TYPE)
break;
#endif
_eglLog(_EGL_DEBUG,
"the value (0x%x) of attribute 0x%04x did not meet the criteria (0x%x)",
val, attr, cmp);
#endif
break;
}
}
+11 -9
View File
@@ -83,15 +83,6 @@ _eglParseContextAttribList(_EGLContext *ctx, const EGLint *attrib_list)
}
}
if (err == EGL_SUCCESS && ctx->Config) {
EGLint renderable_type, api_bit;
renderable_type = GET_CONFIG_ATTRIB(ctx->Config, EGL_RENDERABLE_TYPE);
api_bit = _eglGetContextAPIBit(ctx);
if (!(renderable_type & api_bit))
err = EGL_BAD_CONFIG;
}
return err;
}
@@ -121,6 +112,17 @@ _eglInitContext(_EGLContext *ctx, _EGLDisplay *dpy, _EGLConfig *conf,
ctx->ClientVersion = 1; /* the default, per EGL spec */
err = _eglParseContextAttribList(ctx, attrib_list);
if (err == EGL_SUCCESS && ctx->Config) {
EGLint renderable_type, api_bit;
renderable_type = GET_CONFIG_ATTRIB(ctx->Config, EGL_RENDERABLE_TYPE);
api_bit = _eglGetContextAPIBit(ctx);
if (!(renderable_type & api_bit)) {
_eglLog(_EGL_DEBUG, "context api is 0x%x while config supports 0x%x",
api_bit, renderable_type);
err = EGL_BAD_CONFIG;
}
}
if (err != EGL_SUCCESS)
return _eglError(err, "eglCreateContext");
+8
View File
@@ -24,6 +24,7 @@ enum _egl_resource_type {
_EGL_RESOURCE_CONTEXT,
_EGL_RESOURCE_SURFACE,
_EGL_RESOURCE_IMAGE,
_EGL_RESOURCE_SYNC,
_EGL_NUM_RESOURCES
};
@@ -53,6 +54,8 @@ struct _egl_extensions
EGLBoolean MESA_screen_surface;
EGLBoolean MESA_copy_context;
EGLBoolean MESA_drm_display;
EGLBoolean MESA_drm_image;
EGLBoolean KHR_image_base;
EGLBoolean KHR_image_pixmap;
EGLBoolean KHR_vg_parent_image;
@@ -60,9 +63,14 @@ struct _egl_extensions
EGLBoolean KHR_gl_texture_cubemap_image;
EGLBoolean KHR_gl_texture_3D_image;
EGLBoolean KHR_gl_renderbuffer_image;
EGLBoolean KHR_reusable_sync;
EGLBoolean KHR_fence_sync;
EGLBoolean KHR_surfaceless_gles1;
EGLBoolean KHR_surfaceless_gles2;
EGLBoolean KHR_surfaceless_opengl;
EGLBoolean NOK_swap_region;
EGLBoolean NOK_texture_from_pixmap;
+9
View File
@@ -21,6 +21,7 @@
#include "eglstring.h"
#include "eglsurface.h"
#include "eglimage.h"
#include "eglsync.h"
#include "eglmutex.h"
#if defined(_EGL_OS_UNIX)
@@ -722,6 +723,14 @@ _eglInitDriverFallbacks(_EGLDriver *drv)
drv->API.CreateImageKHR = _eglCreateImageKHR;
drv->API.DestroyImageKHR = _eglDestroyImageKHR;
#endif /* EGL_KHR_image_base */
#ifdef EGL_KHR_reusable_sync
drv->API.CreateSyncKHR = _eglCreateSyncKHR;
drv->API.DestroySyncKHR = _eglDestroySyncKHR;
drv->API.ClientWaitSyncKHR = _eglClientWaitSyncKHR;
drv->API.SignalSyncKHR = _eglSignalSyncKHR;
drv->API.GetSyncAttribKHR = _eglGetSyncAttribKHR;
#endif /* EGL_KHR_reusable_sync */
}
+4
View File
@@ -85,6 +85,7 @@ _eglUpdateExtensionsString(_EGLDisplay *dpy)
_EGL_CHECK_EXTENSION(MESA_screen_surface);
_EGL_CHECK_EXTENSION(MESA_copy_context);
_EGL_CHECK_EXTENSION(MESA_drm_display);
_EGL_CHECK_EXTENSION(MESA_drm_image);
_EGL_CHECK_EXTENSION(KHR_image_base);
_EGL_CHECK_EXTENSION(KHR_image_pixmap);
@@ -97,6 +98,9 @@ _eglUpdateExtensionsString(_EGLDisplay *dpy)
_EGL_CHECK_EXTENSION(KHR_gl_texture_3D_image);
_EGL_CHECK_EXTENSION(KHR_gl_renderbuffer_image);
_EGL_CHECK_EXTENSION(KHR_reusable_sync);
_EGL_CHECK_EXTENSION(KHR_fence_sync);
_EGL_CHECK_EXTENSION(KHR_surfaceless_gles1);
_EGL_CHECK_EXTENSION(KHR_surfaceless_gles2);
_EGL_CHECK_EXTENSION(KHR_surfaceless_opengl);
+128
View File
@@ -0,0 +1,128 @@
#include <string.h>
#include "eglsync.h"
#include "eglcurrent.h"
#include "egllog.h"
#ifdef EGL_KHR_reusable_sync
/**
* Parse the list of sync attributes and return the proper error code.
*/
static EGLint
_eglParseSyncAttribList(_EGLSync *sync, const EGLint *attrib_list)
{
EGLint i, err = EGL_SUCCESS;
if (!attrib_list)
return EGL_SUCCESS;
for (i = 0; attrib_list[i] != EGL_NONE; i++) {
EGLint attr = attrib_list[i++];
EGLint val = attrib_list[i];
switch (attr) {
default:
(void) val;
err = EGL_BAD_ATTRIBUTE;
break;
}
if (err != EGL_SUCCESS) {
_eglLog(_EGL_DEBUG, "bad sync attribute 0x%04x", attr);
break;
}
}
return err;
}
EGLBoolean
_eglInitSync(_EGLSync *sync, _EGLDisplay *dpy, EGLenum type,
const EGLint *attrib_list)
{
EGLint err;
if (!(type == EGL_SYNC_REUSABLE_KHR && dpy->Extensions.KHR_reusable_sync) &&
!(type == EGL_SYNC_FENCE_KHR && dpy->Extensions.KHR_fence_sync))
return _eglError(EGL_BAD_ATTRIBUTE, "eglCreateSyncKHR");
memset(sync, 0, sizeof(*sync));
sync->Resource.Display = dpy;
sync->Type = type;
sync->SyncStatus = EGL_UNSIGNALED_KHR;
sync->SyncCondition = EGL_SYNC_PRIOR_COMMANDS_COMPLETE_KHR;
err = _eglParseSyncAttribList(sync, attrib_list);
if (err != EGL_SUCCESS)
return _eglError(err, "eglCreateSyncKHR");
return EGL_TRUE;
}
_EGLSync *
_eglCreateSyncKHR(_EGLDriver *drv, _EGLDisplay *dpy,
EGLenum type, const EGLint *attrib_list)
{
return NULL;
}
EGLBoolean
_eglDestroySyncKHR(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync)
{
return EGL_TRUE;
}
EGLint
_eglClientWaitSyncKHR(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
EGLint flags, EGLTimeKHR timeout)
{
return EGL_FALSE;
}
EGLBoolean
_eglSignalSyncKHR(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
EGLenum mode)
{
return EGL_FALSE;
}
EGLBoolean
_eglGetSyncAttribKHR(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
EGLint attribute, EGLint *value)
{
if (!value)
return _eglError(EGL_BAD_PARAMETER, "eglGetConfigs");
switch (attribute) {
case EGL_SYNC_TYPE_KHR:
*value = sync->Type;
break;
case EGL_SYNC_STATUS_KHR:
*value = sync->SyncStatus;
break;
case EGL_SYNC_CONDITION_KHR:
if (sync->Type != EGL_SYNC_FENCE_KHR)
return _eglError(EGL_BAD_ATTRIBUTE, "eglGetSyncAttribKHR");
*value = sync->SyncCondition;
break;
default:
return _eglError(EGL_BAD_ATTRIBUTE, "eglGetSyncAttribKHR");
break;
}
return EGL_TRUE;
}
#endif /* EGL_KHR_reusable_sync */
+120
View File
@@ -0,0 +1,120 @@
#ifndef EGLSYNC_INCLUDED
#define EGLSYNC_INCLUDED
#include "egltypedefs.h"
#include "egldisplay.h"
#ifdef EGL_KHR_reusable_sync
/**
* "Base" class for device driver syncs.
*/
struct _egl_sync
{
/* A sync is a display resource */
_EGLResource Resource;
EGLenum Type;
EGLenum SyncStatus;
EGLenum SyncCondition;
};
PUBLIC EGLBoolean
_eglInitSync(_EGLSync *sync, _EGLDisplay *dpy, EGLenum type,
const EGLint *attrib_list);
extern _EGLSync *
_eglCreateSyncKHR(_EGLDriver *drv, _EGLDisplay *dpy,
EGLenum type, const EGLint *attrib_list);
extern EGLBoolean
_eglDestroySyncKHR(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync);
extern EGLint
_eglClientWaitSyncKHR(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
EGLint flags, EGLTimeKHR timeout);
extern EGLBoolean
_eglSignalSyncKHR(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
EGLenum mode);
extern EGLBoolean
_eglGetSyncAttribKHR(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
EGLint attribute, EGLint *value);
/**
* Link a sync to a display and return the handle of the link.
* The handle can be passed to client directly.
*/
static INLINE EGLSyncKHR
_eglLinkSync(_EGLSync *sync, _EGLDisplay *dpy)
{
_eglLinkResource(&sync->Resource, _EGL_RESOURCE_SYNC, dpy);
return (EGLSyncKHR) sync;
}
/**
* Unlink a linked sync from its display.
*/
static INLINE void
_eglUnlinkSync(_EGLSync *sync)
{
_eglUnlinkResource(&sync->Resource, _EGL_RESOURCE_SYNC);
}
/**
* Lookup a handle to find the linked sync.
* Return NULL if the handle has no corresponding linked sync.
*/
static INLINE _EGLSync *
_eglLookupSync(EGLSyncKHR handle, _EGLDisplay *dpy)
{
_EGLSync *sync = (_EGLSync *) handle;
if (!dpy || !_eglCheckResource((void *) sync, _EGL_RESOURCE_SYNC, dpy))
sync = NULL;
return sync;
}
/**
* Return the handle of a linked sync, or EGL_NO_SYNC_KHR.
*/
static INLINE EGLSyncKHR
_eglGetSyncHandle(_EGLSync *sync)
{
_EGLResource *res = (_EGLResource *) sync;
return (res && _eglIsResourceLinked(res)) ?
(EGLSyncKHR) sync : EGL_NO_SYNC_KHR;
}
/**
* Return true if the sync is linked to a display.
*
* The link is considered a reference to the sync (the display is owning the
* sync). Drivers should not destroy a sync when it is linked.
*/
static INLINE EGLBoolean
_eglIsSyncLinked(_EGLSync *sync)
{
_EGLResource *res = (_EGLResource *) sync;
return (res && _eglIsResourceLinked(res));
}
#endif /* EGL_KHR_reusable_sync */
#endif /* EGLSYNC_INCLUDED */
+2
View File
@@ -32,6 +32,8 @@ typedef struct _egl_screen _EGLScreen;
typedef struct _egl_surface _EGLSurface;
typedef struct _egl_sync _EGLSync;
typedef struct _egl_thread_info _EGLThreadInfo;
#endif /* EGLTYPEDEFS_INCLUDED */
+20 -18
View File
@@ -4,8 +4,8 @@ include $(TOP)/configs/current
LIBNAME = gallium
C_SOURCES = \
cso_cache/cso_context.c \
cso_cache/cso_cache.c \
cso_cache/cso_context.c \
cso_cache/cso_hash.c \
draw/draw_context.c \
draw/draw_gs.c \
@@ -26,7 +26,6 @@ C_SOURCES = \
draw/draw_pipe_wide_line.c \
draw/draw_pipe_wide_point.c \
draw/draw_pt.c \
draw/draw_pt_elts.c \
draw/draw_pt_emit.c \
draw/draw_pt_fetch.c \
draw/draw_pt_fetch_emit.c \
@@ -35,24 +34,24 @@ C_SOURCES = \
draw/draw_pt_post_vs.c \
draw/draw_pt_so_emit.c \
draw/draw_pt_util.c \
draw/draw_pt_varray.c \
draw/draw_pt_vcache.c \
draw/draw_pt_vsplit.c \
draw/draw_vertex.c \
draw/draw_vs.c \
draw/draw_vs_varient.c \
draw/draw_vs_aos.c \
draw/draw_vs_aos_io.c \
draw/draw_vs_aos_machine.c \
draw/draw_vs_exec.c \
draw/draw_vs_ppc.c \
draw/draw_vs_sse.c \
draw/draw_vs_varient.c \
indices/u_indices_gen.c \
indices/u_unfilled_gen.c \
os/os_misc.c \
os/os_stream.c \
os/os_stream_log.c \
os/os_stream_null.c \
os/os_stream_stdc.c \
os/os_stream_str.c \
os/os_stream_null.c \
os/os_time.c \
pipebuffer/pb_buffer_fenced.c \
pipebuffer/pb_buffer_malloc.c \
@@ -65,17 +64,16 @@ C_SOURCES = \
pipebuffer/pb_bufmgr_slab.c \
pipebuffer/pb_validate.c \
rbug/rbug_connection.c \
rbug/rbug_core.c \
rbug/rbug_texture.c \
rbug/rbug_context.c \
rbug/rbug_shader.c \
rbug/rbug_core.c \
rbug/rbug_demarshal.c \
rbug/rbug_texture.c \
rbug/rbug_shader.c \
rtasm/rtasm_cpu.c \
rtasm/rtasm_execmem.c \
rtasm/rtasm_x86sse.c \
rtasm/rtasm_ppc.c \
rtasm/rtasm_ppc_spe.c \
tgsi/tgsi_sanity.c \
rtasm/rtasm_x86sse.c \
tgsi/tgsi_build.c \
tgsi/tgsi_dump.c \
tgsi/tgsi_exec.c \
@@ -83,19 +81,22 @@ C_SOURCES = \
tgsi/tgsi_iterate.c \
tgsi/tgsi_parse.c \
tgsi/tgsi_ppc.c \
tgsi/tgsi_sanity.c \
tgsi/tgsi_scan.c \
tgsi/tgsi_sse2.c \
tgsi/tgsi_text.c \
tgsi/tgsi_transform.c \
tgsi/tgsi_ureg.c \
tgsi/tgsi_util.c \
translate/translate_generic.c \
translate/translate_sse.c \
translate/translate.c \
translate/translate_cache.c \
translate/translate_generic.c \
translate/translate_sse.c \
util/u_debug.c \
util/u_debug_symbol.c \
util/u_debug_describe.c \
util/u_debug_refcnt.c \
util/u_debug_stack.c \
util/u_debug_symbol.c \
util/u_dump_defines.c \
util/u_dump_state.c \
util/u_bitmask.c \
@@ -118,10 +119,11 @@ C_SOURCES = \
util/u_gen_mipmap.c \
util/u_half.c \
util/u_handle_table.c \
util/u_hash_table.c \
util/u_hash.c \
util/u_hash_table.c \
util/u_keymap.c \
util/u_linear.c \
util/u_linkage.c \
util/u_network.c \
util/u_math.c \
util/u_mempool.c \
@@ -172,10 +174,10 @@ GALLIVM_SOURCES = \
gallivm/lp_bld_tgsi_soa.c \
gallivm/lp_bld_type.c \
draw/draw_llvm.c \
draw/draw_vs_llvm.c \
draw/draw_pt_fetch_shade_pipeline_llvm.c \
draw/draw_llvm_sample.c \
draw/draw_llvm_translate.c \
draw/draw_llvm_sample.c
draw/draw_vs_llvm.c \
draw/draw_pt_fetch_shade_pipeline_llvm.c
GALLIVM_CPP_SOURCES = \
gallivm/lp_bld_misc.cpp
+22 -19
View File
@@ -50,10 +50,11 @@ env.Depends('util/u_format_table.c', [
])
source = [
'cso_cache/cso_context.c',
'cso_cache/cso_cache.c',
'cso_cache/cso_context.c',
'cso_cache/cso_hash.c',
'draw/draw_context.c',
'draw/draw_gs.c',
'draw/draw_pipe.c',
'draw/draw_pipe_aaline.c',
'draw/draw_pipe_aapoint.c',
@@ -71,7 +72,6 @@ source = [
'draw/draw_pipe_wide_line.c',
'draw/draw_pipe_wide_point.c',
'draw/draw_pt.c',
'draw/draw_pt_elts.c',
'draw/draw_pt_emit.c',
'draw/draw_pt_fetch.c',
'draw/draw_pt_fetch_emit.c',
@@ -80,8 +80,7 @@ source = [
'draw/draw_pt_post_vs.c',
'draw/draw_pt_so_emit.c',
'draw/draw_pt_util.c',
'draw/draw_pt_varray.c',
'draw/draw_pt_vcache.c',
'draw/draw_pt_vsplit.c',
'draw/draw_vertex.c',
'draw/draw_vs.c',
'draw/draw_vs_aos.c',
@@ -91,16 +90,16 @@ source = [
'draw/draw_vs_ppc.c',
'draw/draw_vs_sse.c',
'draw/draw_vs_varient.c',
'draw/draw_gs.c',
#'indices/u_indices.c',
#'indices/u_unfilled_indices.c',
'indices/u_indices_gen.c',
'indices/u_unfilled_gen.c',
'os/os_misc.c',
'os/os_stream.c',
'os/os_stream_log.c',
'os/os_stream_null.c',
'os/os_stream_stdc.c',
'os/os_stream_str.c',
'os/os_stream_null.c',
'os/os_time.c',
'pipebuffer/pb_buffer_fenced.c',
'pipebuffer/pb_buffer_malloc.c',
@@ -112,35 +111,35 @@ source = [
'pipebuffer/pb_bufmgr_pool.c',
'pipebuffer/pb_bufmgr_slab.c',
'pipebuffer/pb_validate.c',
'rbug/rbug_core.c',
'rbug/rbug_shader.c',
'rbug/rbug_context.c',
'rbug/rbug_texture.c',
'rbug/rbug_demarshal.c',
'rbug/rbug_connection.c',
'rbug/rbug_context.c',
'rbug/rbug_core.c',
'rbug/rbug_demarshal.c',
'rbug/rbug_shader.c',
'rbug/rbug_texture.c',
'rtasm/rtasm_cpu.c',
'rtasm/rtasm_execmem.c',
'rtasm/rtasm_x86sse.c',
'rtasm/rtasm_ppc.c',
'rtasm/rtasm_ppc_spe.c',
'rtasm/rtasm_x86sse.c',
'tgsi/tgsi_build.c',
'tgsi/tgsi_dump.c',
'tgsi/tgsi_exec.c',
'tgsi/tgsi_info.c',
'tgsi/tgsi_iterate.c',
'tgsi/tgsi_parse.c',
'tgsi/tgsi_ppc.c',
'tgsi/tgsi_sanity.c',
'tgsi/tgsi_scan.c',
'tgsi/tgsi_ppc.c',
'tgsi/tgsi_sse2.c',
'tgsi/tgsi_text.c',
'tgsi/tgsi_transform.c',
'tgsi/tgsi_ureg.c',
'tgsi/tgsi_util.c',
'translate/translate_generic.c',
'translate/translate_sse.c',
'translate/translate.c',
'translate/translate_cache.c',
'translate/translate_generic.c',
'translate/translate_sse.c',
'util/u_bitmask.c',
'util/u_blit.c',
'util/u_blitter.c',
@@ -148,7 +147,9 @@ source = [
'util/u_caps.c',
'util/u_cpu_detect.c',
'util/u_debug.c',
'util/u_debug_describe.c',
'util/u_debug_memory.c',
'util/u_debug_refcnt.c',
'util/u_debug_stack.c',
'util/u_debug_symbol.c',
'util/u_dump_defines.c',
@@ -170,6 +171,8 @@ source = [
'util/u_hash.c',
'util/u_hash_table.c',
'util/u_keymap.c',
'util/u_linear.c',
'util/u_linkage.c',
'util/u_network.c',
'util/u_math.c',
'util/u_mempool.c',
@@ -208,9 +211,9 @@ if env['llvm']:
'gallivm/lp_bld_format_soa.c',
'gallivm/lp_bld_format_yuv.c',
'gallivm/lp_bld_gather.c',
'gallivm/lp_bld_init.c',
'gallivm/lp_bld_intr.c',
'gallivm/lp_bld_logic.c',
'gallivm/lp_bld_init.c',
'gallivm/lp_bld_misc.cpp',
'gallivm/lp_bld_pack.c',
'gallivm/lp_bld_printf.c',
@@ -222,10 +225,10 @@ if env['llvm']:
'gallivm/lp_bld_tgsi_soa.c',
'gallivm/lp_bld_type.c',
'draw/draw_llvm.c',
'draw/draw_pt_fetch_shade_pipeline_llvm.c',
'draw/draw_llvm_sample.c',
'draw/draw_llvm_translate.c',
'draw/draw_vs_llvm.c',
'draw/draw_llvm_sample.c'
'draw/draw_pt_fetch_shade_pipeline_llvm.c',
'draw/draw_vs_llvm.c'
]
gallium = env.ConvenienceLibrary(
@@ -0,0 +1,114 @@
/**************************************************************************
*
* Copyright 2010, VMware, inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
static boolean TAG(do_cliptest)( struct pt_post_vs *pvs,
struct draw_vertex_info *info )
{
struct vertex_header *out = info->verts;
const float *scale = pvs->draw->viewport.scale;
const float *trans = pvs->draw->viewport.translate;
/* const */ float (*plane)[4] = pvs->draw->plane;
const unsigned pos = draw_current_shader_position_output(pvs->draw);
const unsigned ef = pvs->draw->vs.edgeflag_output;
const unsigned nr = pvs->draw->nr_planes;
const unsigned flags = (FLAGS);
unsigned need_pipeline = 0;
unsigned j;
for (j = 0; j < info->count; j++) {
float *position = out->data[pos];
unsigned mask = 0x0;
initialize_vertex_header(out);
if (flags & (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_HALF_Z | DO_CLIP_USER)) {
out->clip[0] = position[0];
out->clip[1] = position[1];
out->clip[2] = position[2];
out->clip[3] = position[3];
/* Do the hardwired planes first:
*/
if (flags & DO_CLIP_XY) {
if (-position[0] + position[3] < 0) mask |= (1<<0);
if ( position[0] + position[3] < 0) mask |= (1<<1);
if (-position[1] + position[3] < 0) mask |= (1<<2);
if ( position[1] + position[3] < 0) mask |= (1<<3);
}
/* Clip Z planes according to full cube, half cube or none.
*/
if (flags & DO_CLIP_FULL_Z) {
if ( position[2] + position[3] < 0) mask |= (1<<4);
if (-position[2] + position[3] < 0) mask |= (1<<5);
}
else if (flags & DO_CLIP_HALF_Z) {
if ( position[2] < 0) mask |= (1<<4);
if (-position[2] + position[3] < 0) mask |= (1<<5);
}
if (flags & DO_CLIP_USER) {
unsigned i;
for (i = 6; i < nr; i++) {
if (dot4(position, plane[i]) < 0)
mask |= (1<<i);
}
}
out->clipmask = mask;
need_pipeline |= out->clipmask;
}
if ((flags & DO_VIEWPORT) && mask == 0)
{
/* divide by w */
float w = 1.0f / position[3];
/* Viewport mapping */
position[0] = position[0] * w * scale[0] + trans[0];
position[1] = position[1] * w * scale[1] + trans[1];
position[2] = position[2] * w * scale[2] + trans[2];
position[3] = w;
}
if ((flags & DO_EDGEFLAG) && ef) {
const float *edgeflag = out->data[ef];
out->edgeflag = !(edgeflag[0] != 1.0f);
need_pipeline |= !out->edgeflag;
}
out = (struct vertex_header *)( (char *)out + info->stride );
}
return need_pipeline != 0;
}
#undef FLAGS
#undef TAG
+70 -45
View File
@@ -34,6 +34,7 @@
#include "pipe/p_context.h"
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_cpu_detect.h"
#include "draw_context.h"
#include "draw_vs.h"
#include "draw_gs.h"
@@ -41,6 +42,25 @@
#if HAVE_LLVM
#include "gallivm/lp_bld_init.h"
#include "draw_llvm.h"
static boolean
draw_get_option_use_llvm(void)
{
static boolean first = TRUE;
static boolean value;
if (first) {
first = FALSE;
value = debug_get_bool_option("DRAW_USE_LLVM", TRUE);
#ifdef PIPE_ARCH_X86
util_cpu_detect();
/* require SSE2 due to LLVM PR6960. */
if (!util_cpu_caps.has_sse2)
value = FALSE;
#endif
}
return value;
}
#endif
struct draw_context *draw_create( struct pipe_context *pipe )
@@ -50,10 +70,13 @@ struct draw_context *draw_create( struct pipe_context *pipe )
goto fail;
#if HAVE_LLVM
lp_build_init();
assert(lp_build_engine);
draw->engine = lp_build_engine;
draw->llvm = draw_llvm_create(draw);
if(draw_get_option_use_llvm())
{
lp_build_init();
assert(lp_build_engine);
draw->engine = lp_build_engine;
draw->llvm = draw_llvm_create(draw);
}
#endif
if (!draw_init(draw))
@@ -83,6 +106,8 @@ boolean draw_init(struct draw_context *draw)
ASSIGN_4V( draw->plane[4], 0, 0, 1, 1 ); /* yes these are correct */
ASSIGN_4V( draw->plane[5], 0, 0, -1, 1 ); /* mesa's a bit wonky */
draw->nr_planes = 6;
draw->clip_xy = 1;
draw->clip_z = 1;
draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */
@@ -135,7 +160,8 @@ void draw_destroy( struct draw_context *draw )
draw_vs_destroy( draw );
draw_gs_destroy( draw );
#ifdef HAVE_LLVM
draw_llvm_destroy( draw->llvm );
if(draw->llvm)
draw_llvm_destroy( draw->llvm );
#endif
FREE( draw );
@@ -162,6 +188,14 @@ void draw_set_mrd(struct draw_context *draw, double mrd)
}
static void update_clip_flags( struct draw_context *draw )
{
draw->clip_xy = !draw->driver.bypass_clip_xy;
draw->clip_z = (!draw->driver.bypass_clip_z &&
!draw->depth_clamp);
draw->clip_user = (draw->nr_planes > 6);
}
/**
* Register new primitive rasterization/rendering state.
* This causes the drawing pipeline to be rebuilt.
@@ -176,18 +210,25 @@ void draw_set_rasterizer_state( struct draw_context *draw,
draw->rasterizer = raster;
draw->rast_handle = rast_handle;
draw->bypass_clipping = draw->driver.bypass_clipping;
}
}
}
/* With a little more work, llvmpipe will be able to turn this off and
* do its own x/y clipping.
*
* Some hardware can turn off clipping altogether - in particular any
* hardware with a TNL unit can do its own clipping, even if it is
* relying on the draw module for some other reason.
*/
void draw_set_driver_clipping( struct draw_context *draw,
boolean bypass_clipping )
boolean bypass_clip_xy,
boolean bypass_clip_z )
{
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
draw->driver.bypass_clipping = bypass_clipping;
draw->bypass_clipping = draw->driver.bypass_clipping;
draw->driver.bypass_clip_xy = bypass_clip_xy;
draw->driver.bypass_clip_z = bypass_clip_z;
update_clip_flags(draw);
}
@@ -217,6 +258,8 @@ void draw_set_clip_state( struct draw_context *draw,
memcpy(&draw->plane[6], clip->ucp, clip->nr * sizeof(clip->ucp[0]));
draw->nr_planes = 6 + clip->nr;
draw->depth_clamp = clip->depth_clamp;
update_clip_flags(draw);
}
@@ -472,47 +515,28 @@ void draw_set_render( struct draw_context *draw,
}
void
draw_set_index_buffer(struct draw_context *draw,
const struct pipe_index_buffer *ib)
{
if (ib)
memcpy(&draw->pt.index_buffer, ib, sizeof(draw->pt.index_buffer));
else
memset(&draw->pt.index_buffer, 0, sizeof(draw->pt.index_buffer));
}
/**
* Tell the drawing context about the index/element buffer to use
* (ala glDrawElements)
* If no element buffer is to be used (i.e. glDrawArrays) then this
* should be called with eltSize=0 and elements=NULL.
*
* \param draw the drawing context
* \param eltSize size of each element (1, 2 or 4 bytes)
* \param elements the element buffer ptr
* Tell drawing context where to find mapped index/element buffer.
*/
void
draw_set_mapped_element_buffer_range( struct draw_context *draw,
unsigned eltSize,
int eltBias,
unsigned min_index,
unsigned max_index,
const void *elements )
draw_set_mapped_index_buffer(struct draw_context *draw,
const void *elements)
{
draw->pt.user.elts = elements;
draw->pt.user.eltSize = eltSize;
draw->pt.user.eltBias = eltBias;
draw->pt.user.min_index = min_index;
draw->pt.user.max_index = max_index;
draw->pt.user.elts = elements;
}
void
draw_set_mapped_element_buffer( struct draw_context *draw,
unsigned eltSize,
int eltBias,
const void *elements )
{
draw->pt.user.elts = elements;
draw->pt.user.eltSize = eltSize;
draw->pt.user.eltBias = eltBias;
draw->pt.user.min_index = 0;
draw->pt.user.max_index = 0xffffffff;
}
/* Revamp me please:
*/
void draw_do_flush( struct draw_context *draw, unsigned flags )
@@ -659,7 +683,8 @@ draw_set_mapped_texture(struct draw_context *draw,
const void *data[DRAW_MAX_TEXTURE_LEVELS])
{
#ifdef HAVE_LLVM
draw_llvm_set_mapped_texture(draw,
if(draw->llvm)
draw_llvm_set_mapped_texture(draw,
sampler_idx,
width, height, depth, last_level,
row_stride, img_stride, data);
+9 -12
View File
@@ -160,18 +160,11 @@ void draw_set_vertex_elements(struct draw_context *draw,
unsigned count,
const struct pipe_vertex_element *elements);
void
draw_set_mapped_element_buffer_range( struct draw_context *draw,
unsigned eltSize,
int eltBias,
unsigned min_index,
unsigned max_index,
const void *elements );
void draw_set_index_buffer(struct draw_context *draw,
const struct pipe_index_buffer *ib);
void draw_set_mapped_element_buffer( struct draw_context *draw,
unsigned eltSize,
int eltBias,
const void *elements );
void draw_set_mapped_index_buffer(struct draw_context *draw,
const void *elements);
void draw_set_mapped_vertex_buffer(struct draw_context *draw,
unsigned attr, const void *buffer);
@@ -196,6 +189,9 @@ draw_set_so_state(struct draw_context *draw,
* draw_pt.c
*/
void draw_vbo(struct draw_context *draw,
const struct pipe_draw_info *info);
void draw_arrays(struct draw_context *draw, unsigned prim,
unsigned start, unsigned count);
@@ -216,7 +212,8 @@ void draw_set_render( struct draw_context *draw,
struct vbuf_render *render );
void draw_set_driver_clipping( struct draw_context *draw,
boolean bypass_clipping );
boolean bypass_clip_xy,
boolean bypass_clip_z );
void draw_set_force_passthrough( struct draw_context *draw,
boolean enable );
+16 -10
View File
@@ -54,10 +54,10 @@ FUNC(FUNC_VARS)
FUNC_ENTER;
/* prim, count, and last_vertex_last should have been defined */
/* prim, prim_flags, count, and last_vertex_last should have been defined */
if (0) {
debug_printf("%s: prim 0x%x, count %d, last_vertex_last %d\n",
__FUNCTION__, prim, count, last_vertex_last);
debug_printf("%s: prim 0x%x, prim_flags 0x%x, count %d, last_vertex_last %d\n",
__FUNCTION__, prim, prim_flags, count, last_vertex_last);
}
switch (prim) {
@@ -80,7 +80,7 @@ FUNC(FUNC_VARS)
case PIPE_PRIM_LINE_LOOP:
case PIPE_PRIM_LINE_STRIP:
if (count >= 2) {
flags = DRAW_PIPE_RESET_STIPPLE;
flags = (prim_flags & DRAW_SPLIT_BEFORE) ? 0 : DRAW_PIPE_RESET_STIPPLE;
idx[1] = GET_ELT(0);
idx[2] = idx[1];
@@ -90,7 +90,7 @@ FUNC(FUNC_VARS)
LINE(flags, idx[0], idx[1]);
}
/* close the loop */
if (prim == PIPE_PRIM_LINE_LOOP)
if (prim == PIPE_PRIM_LINE_LOOP && !prim_flags)
LINE(flags, idx[1], idx[2]);
}
break;
@@ -255,17 +255,23 @@ FUNC(FUNC_VARS)
if (last_vertex_last) {
flags = (DRAW_PIPE_RESET_STIPPLE |
DRAW_PIPE_EDGE_FLAG_2 |
DRAW_PIPE_EDGE_FLAG_0);
if (!(prim_flags & DRAW_SPLIT_BEFORE))
flags |= DRAW_PIPE_EDGE_FLAG_2;
edge_next = DRAW_PIPE_EDGE_FLAG_0;
edge_finish = DRAW_PIPE_EDGE_FLAG_1;
edge_finish =
(prim_flags & DRAW_SPLIT_AFTER) ? 0 : DRAW_PIPE_EDGE_FLAG_1;
}
else {
flags = (DRAW_PIPE_RESET_STIPPLE |
DRAW_PIPE_EDGE_FLAG_0 |
DRAW_PIPE_EDGE_FLAG_1);
if (!(prim_flags & DRAW_SPLIT_BEFORE))
flags |= DRAW_PIPE_EDGE_FLAG_0;
edge_next = DRAW_PIPE_EDGE_FLAG_1;
edge_finish = DRAW_PIPE_EDGE_FLAG_2;
edge_finish =
(prim_flags & DRAW_SPLIT_AFTER) ? 0 : DRAW_PIPE_EDGE_FLAG_2;
}
idx[0] = GET_ELT(0);
@@ -300,7 +306,7 @@ FUNC(FUNC_VARS)
case PIPE_PRIM_LINE_STRIP_ADJACENCY:
if (count >= 4) {
flags = DRAW_PIPE_RESET_STIPPLE;
flags = (prim_flags & DRAW_SPLIT_BEFORE) ? 0 : DRAW_PIPE_RESET_STIPPLE;
idx[1] = GET_ELT(0);
idx[2] = GET_ELT(1);
idx[3] = GET_ELT(2);
+2 -1
View File
@@ -380,7 +380,7 @@ static void gs_tri_adj(struct draw_geometry_shader *shader,
#define FUNC gs_run_elts
#define LOCAL_VARS const ushort *elts = input_prims->elts;
#define GET_ELT(idx) (elts[idx] & ~DRAW_PIPE_FLAG_MASK)
#define GET_ELT(idx) (elts[idx])
#include "draw_gs_tmp.h"
@@ -457,6 +457,7 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader,
output_prims->start = 0;
output_prims->count = shader->emitted_vertices;
output_prims->prim = shader->output_primitive;
output_prims->flags = 0x0;
output_prims->primitive_lengths = shader->primitive_lengths;
output_prims->primitive_count = shader->emitted_primitives;
output_verts->count = shader->emitted_vertices;
+2 -4
View File
@@ -6,12 +6,10 @@
#define FUNC_ENTER \
/* declare more local vars */ \
struct draw_context *draw = gs->draw; \
const unsigned prim = input_prims->prim; \
const unsigned prim_flags = input_prims->flags; \
const unsigned count = input_prims->count; \
const boolean last_vertex_last = \
!(draw->rasterizer->flatshade && \
draw->rasterizer->flatshade_first); \
const boolean last_vertex_last = TRUE; \
do { \
debug_assert(input_prims->primitive_count == 1); \
switch (prim) { \
+42 -32
View File
@@ -210,13 +210,6 @@ draw_llvm_create(struct draw_context *draw)
{
struct draw_llvm *llvm;
#ifdef PIPE_ARCH_X86
util_cpu_detect();
/* require SSE2 due to LLVM PR6960. */
if (!util_cpu_caps.has_sse2)
return NULL;
#endif
llvm = CALLOC_STRUCT( draw_llvm );
if (!llvm)
return NULL;
@@ -292,15 +285,23 @@ draw_llvm_destroy(struct draw_llvm *llvm)
}
struct draw_llvm_variant *
draw_llvm_create_variant(struct draw_llvm *llvm, int num_inputs)
draw_llvm_create_variant(struct draw_llvm *llvm,
unsigned num_inputs,
const struct draw_llvm_variant_key *key)
{
struct draw_llvm_variant *variant = MALLOC(sizeof(struct draw_llvm_variant));
struct draw_llvm_variant *variant;
struct llvm_vertex_shader *shader =
llvm_vertex_shader(llvm->draw->vs.vertex_shader);
variant = MALLOC(sizeof *variant +
shader->variant_key_size -
sizeof variant->key);
if (variant == NULL)
return NULL;
variant->llvm = llvm;
draw_llvm_make_variant_key(llvm, &variant->key);
memcpy(&variant->key, key, shader->variant_key_size);
llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs);
@@ -738,8 +739,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
/* code generated texture sampling */
sampler = draw_llvm_sampler_soa_create(variant->key.sampler,
context_ptr);
sampler = draw_llvm_sampler_soa_create(
draw_llvm_variant_key_samplers(&variant->key),
context_ptr);
#if DEBUG_STORE
lp_build_printf(builder, "start = %d, end = %d, step = %d\n",
@@ -901,8 +903,9 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
/* code generated texture sampling */
sampler = draw_llvm_sampler_soa_create(variant->key.sampler,
context_ptr);
sampler = draw_llvm_sampler_soa_create(
draw_llvm_variant_key_samplers(&variant->key),
context_ptr);
fetch_max = LLVMBuildSub(builder, fetch_count,
LLVMConstInt(LLVMInt32Type(), 1, 0),
@@ -1002,35 +1005,42 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
lp_func_delete_body(variant->function_elts);
}
void
draw_llvm_make_variant_key(struct draw_llvm *llvm,
struct draw_llvm_variant_key *key)
struct draw_llvm_variant_key *
draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
{
unsigned i;
struct draw_llvm_variant_key *key;
struct lp_sampler_static_state *sampler;
memset(key, 0, sizeof(struct draw_llvm_variant_key));
key = (struct draw_llvm_variant_key *)store;
/* Presumably all variants of the shader should have the same
* number of vertex elements - ie the number of shader inputs.
*/
key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements;
/* All variants of this shader will have the same value for
* nr_samplers. Not yet trying to compact away holes in the
* sampler array.
*/
key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
sampler = draw_llvm_variant_key_samplers(key);
memcpy(key->vertex_element,
llvm->draw->pt.vertex_element,
sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
memset(sampler, 0, key->nr_samplers * sizeof *sampler);
memcpy(&key->vs,
&llvm->draw->vs.vertex_shader->state,
sizeof(struct pipe_shader_state));
/* if the driver implemented the sampling hooks then
* setup our sampling state */
if (llvm->draw->num_sampler_views && llvm->draw->num_samplers) {
for(i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) {
struct draw_vertex_shader *shader = llvm->draw->vs.vertex_shader;
if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i))
lp_sampler_static_state(&key->sampler[i],
llvm->draw->sampler_views[i],
llvm->draw->samplers[i]);
}
for (i = 0 ; i < key->nr_samplers; i++) {
lp_sampler_static_state(&sampler[i],
llvm->draw->sampler_views[i],
llvm->draw->samplers[i]);
}
return key;
}
void
+45 -9
View File
@@ -151,12 +151,43 @@ typedef void
struct draw_llvm_variant_key
{
struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
unsigned nr_vertex_elements;
struct pipe_shader_state vs;
struct lp_sampler_static_state sampler[PIPE_MAX_VERTEX_SAMPLERS];
unsigned nr_vertex_elements:16;
unsigned nr_samplers:16;
/* Variable number of vertex elements:
*/
struct pipe_vertex_element vertex_element[1];
/* Followed by variable number of samplers:
*/
/* struct lp_sampler_static_state sampler; */
};
#define DRAW_LLVM_MAX_VARIANT_KEY_SIZE \
(sizeof(struct draw_llvm_variant_key) + \
PIPE_MAX_VERTEX_SAMPLERS * sizeof(struct lp_sampler_static_state) + \
(PIPE_MAX_ATTRIBS-1) * sizeof(struct pipe_vertex_element))
static INLINE size_t
draw_llvm_variant_key_size(unsigned nr_vertex_elements,
unsigned nr_samplers)
{
return (sizeof(struct draw_llvm_variant_key) +
nr_samplers * sizeof(struct lp_sampler_static_state) +
(nr_vertex_elements - 1) * sizeof(struct pipe_vertex_element));
}
static INLINE struct lp_sampler_static_state *
draw_llvm_variant_key_samplers(struct draw_llvm_variant_key *key)
{
return (struct lp_sampler_static_state *)
&key->vertex_element[key->nr_vertex_elements];
}
struct draw_llvm_variant_list_item
{
struct draw_llvm_variant *base;
@@ -165,7 +196,6 @@ struct draw_llvm_variant_list_item
struct draw_llvm_variant
{
struct draw_llvm_variant_key key;
LLVMValueRef function;
LLVMValueRef function_elts;
draw_jit_vert_func jit_func;
@@ -176,11 +206,16 @@ struct draw_llvm_variant
struct draw_llvm *llvm;
struct draw_llvm_variant_list_item list_item_global;
struct draw_llvm_variant_list_item list_item_local;
/* key is variable-sized, must be last */
struct draw_llvm_variant_key key;
/* key is variable-sized, must be last */
};
struct llvm_vertex_shader {
struct draw_vertex_shader base;
unsigned variant_key_size;
struct draw_llvm_variant_list_item variants;
unsigned variants_created;
unsigned variants_cached;
@@ -220,14 +255,15 @@ void
draw_llvm_destroy(struct draw_llvm *llvm);
struct draw_llvm_variant *
draw_llvm_create_variant(struct draw_llvm *llvm, int num_inputs);
draw_llvm_create_variant(struct draw_llvm *llvm,
unsigned num_vertex_header_attribs,
const struct draw_llvm_variant_key *key);
void
draw_llvm_destroy_variant(struct draw_llvm_variant *variant);
void
draw_llvm_make_variant_key(struct draw_llvm *llvm,
struct draw_llvm_variant_key *key);
struct draw_llvm_variant_key *
draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store);
LLVMValueRef
draw_llvm_translate_from(LLVMBuilderRef builder,
+16 -16
View File
@@ -169,35 +169,27 @@ static void do_triangle( struct draw_context *draw,
/*
* Set up macros for draw_pt_decompose.h template code.
* This code uses vertex indexes / elements.
*
* Flags are needed by the stipple and unfilled stages. When the two stages
* are active, vcache_run_extras is called and the flags are stored in the
* higher bits of i0. Otherwise, flags do not matter.
*/
#define TRIANGLE(flags,i0,i1,i2) \
do { \
assert(!((i1) & DRAW_PIPE_FLAG_MASK)); \
assert(!((i2) & DRAW_PIPE_FLAG_MASK)); \
do_triangle( draw, \
i0, /* flags */ \
verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \
flags, \
verts + stride * (i0), \
verts + stride * (i1), \
verts + stride * (i2) ); \
} while (0)
#define LINE(flags,i0,i1) \
do { \
assert(!((i1) & DRAW_PIPE_FLAG_MASK)); \
do_line( draw, \
i0, /* flags */ \
verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \
flags, \
verts + stride * (i0), \
verts + stride * (i1) ); \
} while (0)
#define POINT(i0) \
do { \
assert(!((i0) & DRAW_PIPE_FLAG_MASK)); \
do_point( draw, verts + stride * (i0) ); \
} while (0)
@@ -207,6 +199,7 @@ static void do_triangle( struct draw_context *draw,
#define FUNC_VARS \
struct draw_context *draw, \
unsigned prim, \
unsigned prim_flags, \
struct vertex_header *vertices, \
unsigned stride, \
const ushort *elts, \
@@ -245,22 +238,27 @@ void draw_pipeline_run( struct draw_context *draw,
const unsigned count = prim_info->primitive_lengths[i];
#if DEBUG
/* make sure none of the element indexes go outside the vertex buffer */
/* Warn if one of the element indexes go outside the vertex buffer */
{
unsigned max_index = 0x0, i;
/* find the largest element index */
for (i = 0; i < count; i++) {
unsigned int index = (prim_info->elts[start + i]
& ~DRAW_PIPE_FLAG_MASK);
unsigned int index = prim_info->elts[start + i];
if (index > max_index)
max_index = index;
}
assert(max_index <= vert_info->count);
if (max_index >= vert_info->count) {
debug_printf("%s: max_index (%u) outside vertex buffer (%u)\n",
__FUNCTION__,
max_index,
vert_info->count);
}
}
#endif
pipe_run_elts(draw,
prim_info->prim,
prim_info->flags,
vert_info->verts,
vert_info->stride,
prim_info->elts + start,
@@ -298,6 +296,7 @@ void draw_pipeline_run( struct draw_context *draw,
#define FUNC_VARS \
struct draw_context *draw, \
unsigned prim, \
unsigned prim_flags, \
struct vertex_header *vertices, \
unsigned stride, \
unsigned count
@@ -330,6 +329,7 @@ void draw_pipeline_run_linear( struct draw_context *draw,
pipe_run_linear(draw,
prim_info->prim,
prim_info->flags,
(struct vertex_header*)verts,
vert_info->stride,
count);
@@ -265,7 +265,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
/* Clip stage
*/
if (!draw->bypass_clipping)
if (draw->clip_xy || draw->clip_z || draw->clip_user)
{
draw->pipeline.clip->next = next;
next = draw->pipeline.clip;
@@ -353,9 +353,6 @@ vbuf_alloc_vertices( struct vbuf_stage *vbuf )
/* Allocate a new vertex buffer */
vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size;
/* even number */
vbuf->max_vertices = vbuf->max_vertices & ~1;
if(vbuf->max_vertices >= UNDEFINED_VERTEX_ID)
vbuf->max_vertices = UNDEFINED_VERTEX_ID - 1;
+25 -19
View File
@@ -140,8 +140,7 @@ struct draw_context
} middle;
struct {
struct draw_pt_front_end *vcache;
struct draw_pt_front_end *varray;
struct draw_pt_front_end *vsplit;
} front;
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
@@ -150,6 +149,8 @@ struct draw_context
struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
unsigned nr_vertex_elements;
struct pipe_index_buffer index_buffer;
/* user-space vertex data, buffers */
struct {
/** vertex element/index buffer (ex: glDrawElements) */
@@ -175,13 +176,19 @@ struct draw_context
} pt;
struct {
boolean bypass_clipping;
boolean bypass_vs;
boolean bypass_clip_xy;
boolean bypass_clip_z;
} driver;
boolean flushing; /**< debugging/sanity */
boolean suspend_flushing; /**< internally set */
boolean bypass_clipping; /**< set if either api or driver bypass_clipping true */
/* Flags set if API requires clipping in these planes and the
* driver doesn't indicate that it can do it for us.
*/
boolean clip_xy;
boolean clip_z;
boolean clip_user;
boolean force_passthrough; /**< never clip or shade */
@@ -296,6 +303,10 @@ struct draw_vertex_info {
unsigned count;
};
/* these flags are set if the primitive is a segment of a larger one */
#define DRAW_SPLIT_BEFORE 0x1
#define DRAW_SPLIT_AFTER 0x2
struct draw_prim_info {
boolean linear;
unsigned start;
@@ -304,6 +315,7 @@ struct draw_prim_info {
unsigned count;
unsigned prim;
unsigned flags;
unsigned *primitive_lengths;
unsigned primitive_count;
};
@@ -369,21 +381,15 @@ void draw_pipeline_destroy( struct draw_context *draw );
/* We use the top few bits in the elts[] parameter to convey a little
* API information. This limits the number of vertices we can address
* to only 4096 -- if that becomes a problem, we can switch to 32-bit
* draw indices.
*
* These flags expected at first vertex of lines & triangles when
* unfilled and/or line stipple modes are operational.
/*
* These flags are used by the pipeline when unfilled and/or line stipple modes
* are operational.
*/
#define DRAW_PIPE_MAX_VERTICES (0x1<<12)
#define DRAW_PIPE_EDGE_FLAG_0 (0x1<<12)
#define DRAW_PIPE_EDGE_FLAG_1 (0x2<<12)
#define DRAW_PIPE_EDGE_FLAG_2 (0x4<<12)
#define DRAW_PIPE_EDGE_FLAG_ALL (0x7<<12)
#define DRAW_PIPE_RESET_STIPPLE (0x8<<12)
#define DRAW_PIPE_FLAG_MASK (0xf<<12)
#define DRAW_PIPE_EDGE_FLAG_0 0x1
#define DRAW_PIPE_EDGE_FLAG_1 0x2
#define DRAW_PIPE_EDGE_FLAG_2 0x4
#define DRAW_PIPE_EDGE_FLAG_ALL 0x7
#define DRAW_PIPE_RESET_STIPPLE 0x8
void draw_pipeline_run( struct draw_context *draw,
const struct draw_vertex_info *vert,
+73 -66
View File
@@ -39,25 +39,14 @@
#include "util/u_math.h"
#include "util/u_prim.h"
#include "util/u_format.h"
#include "util/u_draw.h"
DEBUG_GET_ONCE_BOOL_OPTION(draw_fse, "DRAW_FSE", FALSE)
DEBUG_GET_ONCE_BOOL_OPTION(draw_no_fse, "DRAW_NO_FSE", FALSE)
#ifdef HAVE_LLVM
DEBUG_GET_ONCE_BOOL_OPTION(draw_use_llvm, "DRAW_USE_LLVM", TRUE)
#endif
static unsigned trim( unsigned count, unsigned first, unsigned incr )
{
if (count < first)
return 0;
return count - (count - first) % incr;
}
/* Overall we split things into:
* - frontend -- prepare fetch_elts, draw_elts - eg vcache
* - frontend -- prepare fetch_elts, draw_elts - eg vsplit
* - middle -- fetch, shade, cliptest, viewport
* - pipeline -- the prim pipeline: clipping, wide lines, etc
* - backend -- the vbuf_render provided by the driver.
@@ -77,7 +66,7 @@ draw_pt_arrays(struct draw_context *draw,
{
unsigned first, incr;
draw_pt_split_prim(prim, &first, &incr);
count = trim(count, first, incr);
count = draw_pt_trim_count(count, first, incr);
if (count < first)
return TRUE;
}
@@ -97,7 +86,9 @@ draw_pt_arrays(struct draw_context *draw,
opt |= PT_PIPELINE;
}
if (!draw->bypass_clipping && !draw->pt.test_fse) {
if ((draw->clip_xy ||
draw->clip_z ||
draw->clip_user) && !draw->pt.test_fse) {
opt |= PT_CLIPTEST;
}
@@ -115,22 +106,11 @@ draw_pt_arrays(struct draw_context *draw,
middle = draw->pt.middle.general;
}
/* Pick the right frontend
*/
if (draw->pt.user.elts || (opt & PT_PIPELINE)) {
frontend = draw->pt.front.vcache;
} else {
frontend = draw->pt.front.varray;
}
frontend = draw->pt.front.vsplit;
frontend->prepare( frontend, prim, middle, opt );
frontend->run(frontend,
draw_pt_elt_func(draw),
draw_pt_elt_ptr(draw, start),
draw->pt.user.eltBias,
count);
frontend->run(frontend, start, count);
frontend->finish( frontend );
@@ -143,12 +123,8 @@ boolean draw_pt_init( struct draw_context *draw )
draw->pt.test_fse = debug_get_option_draw_fse();
draw->pt.no_fse = debug_get_option_draw_no_fse();
draw->pt.front.vcache = draw_pt_vcache( draw );
if (!draw->pt.front.vcache)
return FALSE;
draw->pt.front.varray = draw_pt_varray(draw);
if (!draw->pt.front.varray)
draw->pt.front.vsplit = draw_pt_vsplit(draw);
if (!draw->pt.front.vsplit)
return FALSE;
draw->pt.middle.fetch_emit = draw_pt_fetch_emit( draw );
@@ -164,7 +140,7 @@ boolean draw_pt_init( struct draw_context *draw )
return FALSE;
#if HAVE_LLVM
if (debug_get_option_draw_use_llvm())
if (draw->llvm)
draw->pt.middle.llvm = draw_pt_fetch_pipeline_or_emit_llvm( draw );
#endif
@@ -194,14 +170,9 @@ void draw_pt_destroy( struct draw_context *draw )
draw->pt.middle.fetch_shade_emit = NULL;
}
if (draw->pt.front.vcache) {
draw->pt.front.vcache->destroy( draw->pt.front.vcache );
draw->pt.front.vcache = NULL;
}
if (draw->pt.front.varray) {
draw->pt.front.varray->destroy( draw->pt.front.varray );
draw->pt.front.varray = NULL;
if (draw->pt.front.vsplit) {
draw->pt.front.vsplit->destroy( draw->pt.front.vsplit );
draw->pt.front.vsplit = NULL;
}
}
@@ -221,24 +192,29 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
uint ii = 0;
uint j;
if (draw->pt.user.elts) {
if (draw->pt.user.eltSize) {
const char *elts;
/* indexed arrays */
elts = (const char *) draw->pt.user.elts;
elts += draw->pt.index_buffer.offset;
switch (draw->pt.user.eltSize) {
case 1:
{
const ubyte *elem = (const ubyte *) draw->pt.user.elts;
const ubyte *elem = (const ubyte *) elts;
ii = elem[start + i];
}
break;
case 2:
{
const ushort *elem = (const ushort *) draw->pt.user.elts;
const ushort *elem = (const ushort *) elts;
ii = elem[start + i];
}
break;
case 4:
{
const uint *elem = (const uint *) draw->pt.user.elts;
const uint *elem = (const uint *) elts;
ii = elem[start + i];
}
break;
@@ -324,17 +300,8 @@ draw_arrays(struct draw_context *draw, unsigned prim,
/**
* Draw vertex arrays.
* This is the main entrypoint into the drawing module.
* If drawing an indexed primitive, the draw_set_mapped_element_buffer_range()
* function should have already been called to specify the element/index buffer
* information.
*
* \param prim one of PIPE_PRIM_x
* \param start index of first vertex to draw
* \param count number of vertices to draw
* \param startInstance number for the first primitive instance (usually 0).
* \param instanceCount number of instances to draw (1=non-instanced)
* Instanced drawing.
* \sa draw_vbo
*/
void
draw_arrays_instanced(struct draw_context *draw,
@@ -344,10 +311,50 @@ draw_arrays_instanced(struct draw_context *draw,
unsigned startInstance,
unsigned instanceCount)
{
unsigned reduced_prim = u_reduced_prim(mode);
struct pipe_draw_info info;
util_draw_init_info(&info);
info.mode = mode;
info.start = start;
info.count = count;
info.start_instance = startInstance;
info.instance_count = instanceCount;
info.indexed = (draw->pt.user.elts != NULL);
if (!info.indexed) {
info.min_index = start;
info.max_index = start + count - 1;
}
draw_vbo(draw, &info);
}
/**
* Draw vertex arrays.
* This is the main entrypoint into the drawing module. If drawing an indexed
* primitive, the draw_set_index_buffer() and draw_set_mapped_index_buffer()
* functions should have already been called to specify the element/index
* buffer information.
*/
void
draw_vbo(struct draw_context *draw,
const struct pipe_draw_info *info)
{
unsigned reduced_prim = u_reduced_prim(info->mode);
unsigned instance;
assert(instanceCount > 0);
assert(info->instance_count > 0);
if (info->indexed)
assert(draw->pt.user.elts);
draw->pt.user.eltSize =
(info->indexed) ? draw->pt.index_buffer.index_size : 0;
draw->pt.user.eltBias = info->index_bias;
draw->pt.user.min_index = info->min_index;
draw->pt.user.max_index = info->max_index;
if (reduced_prim != draw->reduced_prim) {
draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
@@ -355,8 +362,8 @@ draw_arrays_instanced(struct draw_context *draw,
}
if (0)
debug_printf("draw_arrays(mode=%u start=%u count=%u):\n",
mode, start, count);
debug_printf("draw_vbo(mode=%u start=%u count=%u):\n",
info->mode, info->start, info->count);
if (0)
tgsi_dump(draw->vs.vertex_shader->state.tokens, 0);
@@ -384,10 +391,10 @@ draw_arrays_instanced(struct draw_context *draw,
}
if (0)
draw_print_arrays(draw, mode, start, MIN2(count, 20));
draw_print_arrays(draw, info->mode, info->start, MIN2(info->count, 20));
for (instance = 0; instance < instanceCount; instance++) {
draw->instance_id = instance + startInstance;
draw_pt_arrays(draw, mode, start, count);
for (instance = 0; instance < info->instance_count; instance++) {
draw->instance_id = instance + info->start_instance;
draw_pt_arrays(draw, info->mode, info->start, info->count);
}
}
+26 -25
View File
@@ -35,8 +35,6 @@
#include "pipe/p_compiler.h"
typedef unsigned (*pt_elt_func)( const void *elts, unsigned idx );
struct draw_pt_middle_end;
struct draw_context;
struct draw_prim_info;
@@ -52,13 +50,18 @@ struct draw_vertex_info;
/* The "front end" - prepare sets of fetch, draw elements for the
* middle end.
*
* Currenly one version of this:
* - vcache - catchall implementation, decomposes to TRI/LINE/POINT prims
* Later:
* - varray, varray_split
* - velement, velement_split
* The fetch elements are indices to the vertices. The draw elements are
* indices to the fetched vertices. When both arrays of elements are both
* linear, middle->run_linear is called; When only the fetch elements are
* linear, middle->run_linear_elts is called; Otherwise, middle->run is
* called.
*
* Currenly only using the vcache version.
* When the number of the draw elements exceeds max_vertex of the middle end,
* the draw elements (as well as the fetch elements) are splitted and the
* middle end is called multiple times.
*
* Currenly there is:
* - vsplit - catchall implementation, splits big prims
*/
struct draw_pt_front_end {
void (*prepare)( struct draw_pt_front_end *,
@@ -67,9 +70,7 @@ struct draw_pt_front_end {
unsigned opt );
void (*run)( struct draw_pt_front_end *,
pt_elt_func elt_func,
const void *elt_ptr,
int elt_bias,
unsigned start,
unsigned count );
void (*finish)( struct draw_pt_front_end * );
@@ -80,6 +81,8 @@ struct draw_pt_front_end {
/* The "middle end" - prepares actual hardware vertices for the
* hardware backend.
*
* prim_flags is as defined by pipe_draw_info::flags.
*
* Currently two versions of this:
* - fetch, vertex shade, cliptest, prim-pipeline
* - fetch, emit (ie passthrough)
@@ -94,11 +97,13 @@ struct draw_pt_middle_end {
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
unsigned draw_count );
unsigned draw_count,
unsigned prim_flags );
void (*run_linear)(struct draw_pt_middle_end *,
unsigned start,
unsigned count);
unsigned count,
unsigned prim_flags );
/* Transform all vertices in a linear range and then draw them with
* the supplied element list. May fail and return FALSE.
@@ -107,7 +112,8 @@ struct draw_pt_middle_end {
unsigned fetch_start,
unsigned fetch_count,
const ushort *draw_elts,
unsigned draw_count );
unsigned draw_count,
unsigned prim_flags );
int (*get_max_vertex_count)( struct draw_pt_middle_end * );
@@ -122,19 +128,11 @@ struct vbuf_render;
struct vertex_header;
/* Helper functions.
*/
pt_elt_func draw_pt_elt_func( struct draw_context *draw );
const void *draw_pt_elt_ptr( struct draw_context *draw,
unsigned start );
/* Frontends:
*
* Currently only the general-purpose vcache implementation, could add
* a special case for tiny vertex buffers.
* Currently only the general-purpose vsplit implementation.
*/
struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw );
struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw);
struct draw_pt_front_end *draw_pt_vsplit(struct draw_context *draw);
/* Middle-ends:
@@ -223,7 +221,9 @@ boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
struct draw_vertex_info *info );
void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
boolean bypass_clipping,
boolean clip_xy,
boolean clip_z,
boolean clip_user,
boolean bypass_viewport,
boolean opengl,
boolean need_edgeflags );
@@ -237,6 +237,7 @@ void draw_pt_post_vs_destroy( struct pt_post_vs *pvs );
* Utils:
*/
void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr);
unsigned draw_pt_trim_count(unsigned count, unsigned first, unsigned incr);
#endif
-89
View File
@@ -1,89 +0,0 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "draw/draw_pt.h"
#include "draw/draw_private.h"
/* Neat get_elt func that also works for varrays drawing by encoding
* the start value into a pointer.
*/
static unsigned elt_uint( const void *elts, unsigned idx )
{
return *(((const uint *)elts) + idx);
}
static unsigned elt_ushort( const void *elts, unsigned idx )
{
return *(((const ushort *)elts) + idx);
}
static unsigned elt_ubyte( const void *elts, unsigned idx )
{
return *(((const ubyte *)elts) + idx);
}
static unsigned elt_vert( const void *elts, unsigned idx )
{
/* unsigned index is packed in the pointer */
return (unsigned)(uintptr_t)elts + idx;
}
pt_elt_func draw_pt_elt_func( struct draw_context *draw )
{
switch (draw->pt.user.eltSize) {
case 0: return &elt_vert;
case 1: return &elt_ubyte;
case 2: return &elt_ushort;
case 4: return &elt_uint;
default: return NULL;
}
}
const void *draw_pt_elt_ptr( struct draw_context *draw,
unsigned start )
{
const char *elts = draw->pt.user.elts;
switch (draw->pt.user.eltSize) {
case 0:
return (const void *)(((const ubyte *)NULL) + start);
case 1:
return (const void *)(((const ubyte *)elts) + start);
case 2:
return (const void *)(((const ushort *)elts) + start);
case 4:
return (const void *)(((const uint *)elts) + start);
default:
return NULL;
}
}
-11
View File
@@ -120,9 +120,6 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
*max_vertices = (draw->render->max_vertex_buffer_bytes /
(vinfo->size * 4));
/* even number */
*max_vertices = *max_vertices & ~1;
}
@@ -147,11 +144,6 @@ void draw_pt_emit( struct pt_emit *emit,
if (vertex_count == 0)
return;
if (vertex_count >= UNDEFINED_VERTEX_ID) {
assert(0);
return;
}
/* XXX: and work out some way to coordinate the render primitive
* between vbuf.c and here...
*/
@@ -226,9 +218,6 @@ void draw_pt_emit_linear(struct pt_emit *emit,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
if (count >= UNDEFINED_VERTEX_ID)
goto fail;
/* XXX: and work out some way to coordinate the render primitive
* between vbuf.c and here...
*/
@@ -191,15 +191,6 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
*max_vertices = (draw->render->max_vertex_buffer_bytes /
(vinfo->size * 4));
/* Return an even number of verts.
* This prevents "parity" errors when splitting long triangle strips which
* can lead to front/back culling mix-ups.
* Every other triangle in a strip has an alternate front/back orientation
* so splitting at an odd position can cause the orientation of subsequent
* triangles to get reversed.
*/
*max_vertices = *max_vertices & ~1;
}
@@ -210,7 +201,8 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
unsigned draw_count )
unsigned draw_count,
unsigned prim_flags )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
@@ -220,11 +212,6 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
if (fetch_count >= UNDEFINED_VERTEX_ID) {
assert(0);
return;
}
draw->render->allocate_vertices( draw->render,
(ushort)feme->translate->key.output_stride,
(ushort)fetch_count );
@@ -273,7 +260,8 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count )
unsigned count,
unsigned prim_flags )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
@@ -283,9 +271,6 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
if (count >= UNDEFINED_VERTEX_ID)
goto fail;
if (!draw->render->allocate_vertices( draw->render,
(ushort)feme->translate->key.output_stride,
(ushort)count ))
@@ -334,7 +319,8 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count,
const ushort *draw_elts,
unsigned draw_count )
unsigned draw_count,
unsigned prim_flags )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
@@ -344,9 +330,6 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
if (count >= UNDEFINED_VERTEX_ID)
return FALSE;
if (!draw->render->allocate_vertices( draw->render,
(ushort)feme->translate->key.output_stride,
(ushort)count ))
@@ -102,7 +102,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
fse->key.nr_inputs); /* inputs - fetch from api format */
fse->key.viewport = !draw->identity_viewport;
fse->key.clip = !draw->bypass_clipping;
fse->key.clip = draw->clip_xy || draw->clip_z || draw->clip_user;
fse->key.const_vbuffers = 0;
memset(fse->key.element, 0,
@@ -175,15 +175,6 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
*max_vertices = (draw->render->max_vertex_buffer_bytes /
(vinfo->size * 4));
/* Return an even number of verts.
* This prevents "parity" errors when splitting long triangle strips which
* can lead to front/back culling mix-ups.
* Every other triangle in a strip has an alternate front/back orientation
* so splitting at an odd position can cause the orientation of subsequent
* triangles to get reversed.
*/
*max_vertices = *max_vertices & ~1;
/* Probably need to do this somewhere (or fix exec shader not to
* need it):
*/
@@ -197,7 +188,8 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
static void fse_run_linear( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count )
unsigned count,
unsigned prim_flags )
{
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
struct draw_context *draw = fse->draw;
@@ -207,9 +199,6 @@ static void fse_run_linear( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
if (count >= UNDEFINED_VERTEX_ID)
goto fail;
if (!draw->render->allocate_vertices( draw->render,
(ushort)fse->key.output_stride,
(ushort)count ))
@@ -265,7 +254,8 @@ fse_run(struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
unsigned draw_count )
unsigned draw_count,
unsigned prim_flags )
{
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
struct draw_context *draw = fse->draw;
@@ -275,9 +265,6 @@ fse_run(struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
if (fetch_count >= UNDEFINED_VERTEX_ID)
goto fail;
if (!draw->render->allocate_vertices( draw->render,
(ushort)fse->key.output_stride,
(ushort)fetch_count ))
@@ -327,7 +314,8 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count,
const ushort *draw_elts,
unsigned draw_count )
unsigned draw_count,
unsigned prim_flags )
{
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
struct draw_context *draw = fse->draw;
@@ -337,9 +325,6 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
if (count >= UNDEFINED_VERTEX_ID)
return FALSE;
if (!draw->render->allocate_vertices( draw->render,
(ushort)fse->key.output_stride,
(ushort)count ))
@@ -100,8 +100,10 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
* but gl vs dx9 clip spaces.
*/
draw_pt_post_vs_prepare( fpme->post_vs,
(boolean)draw->bypass_clipping,
(boolean)draw->identity_viewport,
draw->clip_xy,
draw->clip_z,
draw->clip_user,
draw->identity_viewport,
(boolean)draw->rasterizer->gl_rasterization_rules,
(draw->vs.edgeflag_output ? TRUE : FALSE) );
@@ -112,16 +114,13 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
gs_out_prim,
max_vertices );
*max_vertices = MAX2( *max_vertices,
DRAW_PIPE_MAX_VERTICES );
*max_vertices = MAX2( *max_vertices, 4096 );
}
else {
*max_vertices = DRAW_PIPE_MAX_VERTICES;
/* limit max fetches by limiting max_vertices */
*max_vertices = 4096;
}
/* return even number */
*max_vertices = *max_vertices & ~1;
/* No need to prepare the shader.
*/
vs->prepare(vs, draw);
@@ -295,7 +294,8 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
unsigned draw_count )
unsigned draw_count,
unsigned prim_flags )
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -311,6 +311,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
prim_info.count = draw_count;
prim_info.elts = draw_elts;
prim_info.prim = fpme->input_prim;
prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;
@@ -320,7 +321,8 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count)
unsigned count,
unsigned prim_flags)
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -336,6 +338,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
prim_info.count = count;
prim_info.elts = NULL;
prim_info.prim = fpme->input_prim;
prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &count;
@@ -348,7 +351,8 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle
unsigned start,
unsigned count,
const ushort *draw_elts,
unsigned draw_count )
unsigned draw_count,
unsigned prim_flags )
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -364,6 +368,7 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle
prim_info.count = draw_count;
prim_info.elts = draw_elts;
prim_info.prim = fpme->input_prim;
prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;
@@ -66,7 +66,8 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
struct draw_context *draw = fpme->draw;
struct llvm_vertex_shader *shader =
llvm_vertex_shader(draw->vs.vertex_shader);
struct draw_llvm_variant_key key;
char store[DRAW_LLVM_MAX_VARIANT_KEY_SIZE];
struct draw_llvm_variant_key *key;
struct draw_llvm_variant *variant = NULL;
struct draw_llvm_variant_list_item *li;
unsigned i;
@@ -106,8 +107,10 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
* but gl vs dx9 clip spaces.
*/
draw_pt_post_vs_prepare( fpme->post_vs,
(boolean)draw->bypass_clipping,
(boolean)(draw->identity_viewport),
draw->clip_xy,
draw->clip_z,
draw->clip_user,
draw->identity_viewport,
(boolean)draw->rasterizer->gl_rasterization_rules,
(draw->vs.edgeflag_output ? TRUE : FALSE) );
@@ -118,21 +121,21 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
out_prim,
max_vertices );
*max_vertices = MAX2( *max_vertices,
DRAW_PIPE_MAX_VERTICES );
*max_vertices = MAX2( *max_vertices, 4096 );
}
else {
*max_vertices = DRAW_PIPE_MAX_VERTICES;
/* limit max fetches by limiting max_vertices */
*max_vertices = 4096;
}
/* return even number */
*max_vertices = *max_vertices & ~1;
draw_llvm_make_variant_key(fpme->llvm, &key);
key = draw_llvm_make_variant_key(fpme->llvm, store);
li = first_elem(&shader->variants);
while(!at_end(&shader->variants, li)) {
if(memcmp(&li->base->key, &key, sizeof key) == 0) {
if(memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
variant = li->base;
break;
}
@@ -155,7 +158,7 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
}
}
variant = draw_llvm_create_variant(fpme->llvm, nr);
variant = draw_llvm_create_variant(fpme->llvm, nr, key);
if (variant) {
insert_at_head(&shader->variants, &variant->list_item_local);
@@ -294,7 +297,8 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
unsigned draw_count )
unsigned draw_count,
unsigned prim_flags )
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -310,6 +314,7 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
prim_info.count = draw_count;
prim_info.elts = draw_elts;
prim_info.prim = fpme->input_prim;
prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;
@@ -319,7 +324,8 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count)
unsigned count,
unsigned prim_flags)
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -335,6 +341,7 @@ static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle,
prim_info.count = count;
prim_info.elts = NULL;
prim_info.prim = fpme->input_prim;
prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &count;
@@ -348,7 +355,8 @@ llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count,
const ushort *draw_elts,
unsigned draw_count )
unsigned draw_count,
unsigned prim_flags )
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -364,6 +372,7 @@ llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle,
prim_info.count = draw_count;
prim_info.elts = draw_elts;
prim_info.prim = fpme->input_prim;
prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;
+104 -190
View File
@@ -26,14 +26,26 @@
**************************************************************************/
#include "util/u_memory.h"
#include "util/u_math.h"
#include "pipe/p_context.h"
#include "draw/draw_context.h"
#include "draw/draw_private.h"
#include "draw/draw_pt.h"
#define DO_CLIP_XY 0x1
#define DO_CLIP_FULL_Z 0x2
#define DO_CLIP_HALF_Z 0x4
#define DO_CLIP_USER 0x8
#define DO_VIEWPORT 0x10
#define DO_EDGEFLAG 0x20
struct pt_post_vs {
struct draw_context *draw;
unsigned flags;
boolean (*run)( struct pt_post_vs *pvs,
struct draw_vertex_info *info );
};
@@ -56,186 +68,47 @@ dot4(const float *a, const float *b)
a[3]*b[3]);
}
static INLINE unsigned
compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr,
boolean clip_depth)
{
unsigned mask = 0x0;
unsigned i;
#define FLAGS (0)
#define TAG(x) x##_none
#include "draw_cliptest_tmp.h"
#if 0
debug_printf("compute clipmask %f %f %f %f\n",
clip[0], clip[1], clip[2], clip[3]);
assert(clip[3] != 0.0);
#endif
#define FLAGS (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_VIEWPORT)
#define TAG(x) x##_xy_fullz_viewport
#include "draw_cliptest_tmp.h"
/* Do the hardwired planes first:
*/
if (-clip[0] + clip[3] < 0) mask |= (1<<0);
if ( clip[0] + clip[3] < 0) mask |= (1<<1);
if (-clip[1] + clip[3] < 0) mask |= (1<<2);
if ( clip[1] + clip[3] < 0) mask |= (1<<3);
if (clip_depth) {
if ( clip[2] + clip[3] < 0) mask |= (1<<4); /* match mesa clipplane numbering - for now */
if (-clip[2] + clip[3] < 0) mask |= (1<<5); /* match mesa clipplane numbering - for now */
}
#define FLAGS (DO_CLIP_XY | DO_CLIP_HALF_Z | DO_VIEWPORT)
#define TAG(x) x##_xy_halfz_viewport
#include "draw_cliptest_tmp.h"
/* Followed by any remaining ones:
*/
for (i = 6; i < nr; i++) {
if (dot4(clip, plane[i]) < 0)
mask |= (1<<i);
}
#define FLAGS (DO_CLIP_FULL_Z | DO_VIEWPORT)
#define TAG(x) x##_fullz_viewport
#include "draw_cliptest_tmp.h"
return mask;
}
#define FLAGS (DO_CLIP_HALF_Z | DO_VIEWPORT)
#define TAG(x) x##_halfz_viewport
#include "draw_cliptest_tmp.h"
#define FLAGS (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER | DO_VIEWPORT)
#define TAG(x) x##_xy_fullz_user_viewport
#include "draw_cliptest_tmp.h"
#define FLAGS (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER | DO_VIEWPORT | DO_EDGEFLAG)
#define TAG(x) x##_xy_fullz_user_viewport_edgeflag
#include "draw_cliptest_tmp.h"
/* The normal case - cliptest, rhw divide, viewport transform.
*
* Also handle identity viewport here at the expense of a few wasted
* instructions
/* Don't want to create 64 versions of this function, so catch the
* less common ones here. This is looking like something which should
* be code-generated, perhaps appended to the end of the vertex
* shader.
*/
static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs,
struct draw_vertex_info *info )
{
struct vertex_header *out = info->verts;
const float *scale = pvs->draw->viewport.scale;
const float *trans = pvs->draw->viewport.translate;
const unsigned pos = draw_current_shader_position_output(pvs->draw);
unsigned clipped = 0;
unsigned j;
if (0) debug_printf("%s count, %d\n", __FUNCTION__, info->count);
for (j = 0; j < info->count; j++) {
float *position = out->data[pos];
initialize_vertex_header(out);
#if 0
debug_printf("%d) io = %p, data = %p = [%f, %f, %f, %f]\n",
j, out, position, position[0], position[1], position[2], position[3]);
#endif
out->clip[0] = position[0];
out->clip[1] = position[1];
out->clip[2] = position[2];
out->clip[3] = position[3];
out->vertex_id = 0xffff;
/* Disable depth clipping if depth clamping is enabled. */
out->clipmask = compute_clipmask_gl(out->clip,
pvs->draw->plane,
pvs->draw->nr_planes,
!pvs->draw->depth_clamp);
clipped += out->clipmask;
if (out->clipmask == 0)
{
/* divide by w */
float w = 1.0f / position[3];
/* Viewport mapping */
position[0] = position[0] * w * scale[0] + trans[0];
position[1] = position[1] * w * scale[1] + trans[1];
position[2] = position[2] * w * scale[2] + trans[2];
position[3] = w;
#if 0
debug_printf("post viewport: %f %f %f %f\n",
position[0],
position[1],
position[2],
position[3]);
#endif
}
out = (struct vertex_header *)( (char *)out + info->stride );
}
return clipped != 0;
}
#define FLAGS (pvs->flags)
#define TAG(x) x##_generic
#include "draw_cliptest_tmp.h"
/* As above plus edgeflags
*/
static boolean
post_vs_cliptest_viewport_gl_edgeflag(struct pt_post_vs *pvs,
struct draw_vertex_info *info)
{
unsigned j;
boolean needpipe;
needpipe = post_vs_cliptest_viewport_gl(pvs, info);
/* If present, copy edgeflag VS output into vertex header.
* Otherwise, leave header as is.
*/
if (pvs->draw->vs.edgeflag_output) {
struct vertex_header *out = info->verts;
int ef = pvs->draw->vs.edgeflag_output;
for (j = 0; j < info->count; j++) {
const float *edgeflag = out->data[ef];
out->edgeflag = !(edgeflag[0] != 1.0f);
needpipe |= !out->edgeflag;
out = (struct vertex_header *)( (char *)out + info->stride );
}
}
return needpipe;
}
/* If bypass_clipping is set, skip cliptest and rhw divide.
*/
static boolean post_vs_viewport( struct pt_post_vs *pvs,
struct draw_vertex_info *info )
{
struct vertex_header *out = info->verts;
const float *scale = pvs->draw->viewport.scale;
const float *trans = pvs->draw->viewport.translate;
const unsigned pos = draw_current_shader_position_output(pvs->draw);
unsigned j;
if (0) debug_printf("%s\n", __FUNCTION__);
for (j = 0; j < info->count; j++) {
float *position = out->data[pos];
initialize_vertex_header(out);
/* Viewport mapping only, no cliptest/rhw divide
*/
position[0] = position[0] * scale[0] + trans[0];
position[1] = position[1] * scale[1] + trans[1];
position[2] = position[2] * scale[2] + trans[2];
out = (struct vertex_header *)((char *)out + info->stride);
}
return FALSE;
}
/* If bypass_clipping is set and we have an identity viewport, nothing
* to do.
*/
static boolean post_vs_none( struct pt_post_vs *pvs,
struct draw_vertex_info *info )
{
struct vertex_header *out = info->verts;
unsigned j;
if (0) debug_printf("%s\n", __FUNCTION__);
/* just initialize the vertex_id in all headers */
for (j = 0; j < info->count; j++) {
initialize_vertex_header(out);
out = (struct vertex_header *)((char *)out + info->stride);
}
return FALSE;
}
boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
struct draw_vertex_info *info )
{
@@ -244,31 +117,72 @@ boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
boolean bypass_clipping,
boolean clip_xy,
boolean clip_z,
boolean clip_user,
boolean bypass_viewport,
boolean opengl,
boolean need_edgeflags )
{
if (!need_edgeflags) {
if (bypass_clipping) {
if (bypass_viewport)
pvs->run = post_vs_none;
else
pvs->run = post_vs_viewport;
}
else {
/* if (opengl) */
pvs->run = post_vs_cliptest_viewport_gl;
}
pvs->flags = 0;
if (clip_xy)
pvs->flags |= DO_CLIP_XY;
if (clip_z && opengl) {
pvs->flags |= DO_CLIP_FULL_Z;
ASSIGN_4V( pvs->draw->plane[4], 0, 0, 1, 1 );
}
else {
/* If we need to copy edgeflags to the vertex header, it should
* mean we're running the primitive pipeline. Hence the bypass
* flags should be false.
*/
assert(!bypass_clipping);
assert(!bypass_viewport);
pvs->run = post_vs_cliptest_viewport_gl_edgeflag;
if (clip_z && !opengl) {
pvs->flags |= DO_CLIP_HALF_Z;
ASSIGN_4V( pvs->draw->plane[4], 0, 0, 1, 0 );
}
if (clip_user)
pvs->flags |= DO_CLIP_USER;
if (!bypass_viewport)
pvs->flags |= DO_VIEWPORT;
if (need_edgeflags)
pvs->flags |= DO_EDGEFLAG;
/* Now select the relevant function:
*/
switch (pvs->flags) {
case 0:
pvs->run = do_cliptest_none;
break;
case DO_CLIP_XY | DO_CLIP_FULL_Z | DO_VIEWPORT:
pvs->run = do_cliptest_xy_fullz_viewport;
break;
case DO_CLIP_XY | DO_CLIP_HALF_Z | DO_VIEWPORT:
pvs->run = do_cliptest_xy_halfz_viewport;
break;
case DO_CLIP_FULL_Z | DO_VIEWPORT:
pvs->run = do_cliptest_fullz_viewport;
break;
case DO_CLIP_HALF_Z | DO_VIEWPORT:
pvs->run = do_cliptest_halfz_viewport;
break;
case DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER | DO_VIEWPORT:
pvs->run = do_cliptest_xy_fullz_user_viewport;
break;
case (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER |
DO_VIEWPORT | DO_EDGEFLAG):
pvs->run = do_cliptest_xy_fullz_user_viewport_edgeflag;
break;
default:
pvs->run = do_cliptest_generic;
break;
}
}
+1 -1
View File
@@ -225,7 +225,7 @@ static void so_tri(struct pt_so_emit *so, int i0, int i1, int i2)
#define FUNC so_run_elts
#define LOCAL_VARS const ushort *elts = input_prims->elts;
#define GET_ELT(idx) (elts[start + (idx)] & ~DRAW_PIPE_FLAG_MASK)
#define GET_ELT(idx) (elts[start + (idx)])
#include "draw_so_emit_tmp.h"
@@ -92,3 +92,10 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
break;
}
}
unsigned draw_pt_trim_count(unsigned count, unsigned first, unsigned incr)
{
if (count < first)
return 0;
return count - (count - first) % incr;
}
-200
View File
@@ -1,200 +0,0 @@
/**************************************************************************
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#include "util/u_math.h"
#include "util/u_memory.h"
#include "draw/draw_context.h"
#include "draw/draw_private.h"
#include "draw/draw_pt.h"
#define FETCH_MAX 256
#define DRAW_MAX (FETCH_MAX+8)
struct varray_frontend {
struct draw_pt_front_end base;
struct draw_context *draw;
ushort draw_elts[DRAW_MAX];
unsigned fetch_elts[FETCH_MAX];
unsigned driver_fetch_max;
unsigned fetch_max;
struct draw_pt_middle_end *middle;
unsigned input_prim;
unsigned output_prim;
};
static void varray_flush_linear(struct varray_frontend *varray,
unsigned start, unsigned count)
{
if (count) {
assert(varray->middle->run_linear);
varray->middle->run_linear(varray->middle, start, count);
}
}
static void varray_line_loop_segment(struct varray_frontend *varray,
unsigned start,
unsigned segment_start,
unsigned segment_count,
boolean end )
{
assert(segment_count < varray->fetch_max);
if (segment_count >= 1) {
unsigned nr = 0, i;
for (i = 0; i < segment_count; i++)
varray->fetch_elts[nr++] = start + segment_start + i;
if (end)
varray->fetch_elts[nr++] = start;
assert(nr <= FETCH_MAX);
varray->middle->run(varray->middle,
varray->fetch_elts,
nr,
varray->draw_elts, /* ie. linear */
nr);
}
}
static void varray_fan_segment(struct varray_frontend *varray,
unsigned start,
unsigned segment_start,
unsigned segment_count )
{
assert(segment_count < varray->fetch_max);
if (segment_count >= 2) {
unsigned nr = 0, i;
if (segment_start != 0)
varray->fetch_elts[nr++] = start;
for (i = 0 ; i < segment_count; i++)
varray->fetch_elts[nr++] = start + segment_start + i;
assert(nr <= FETCH_MAX);
varray->middle->run(varray->middle,
varray->fetch_elts,
nr,
varray->draw_elts, /* ie. linear */
nr);
}
}
#define FUNC varray_run
#include "draw_pt_varray_tmp_linear.h"
static unsigned decompose_prim[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY + 1] = {
PIPE_PRIM_POINTS,
PIPE_PRIM_LINES,
PIPE_PRIM_LINE_STRIP, /* decomposed LINELOOP */
PIPE_PRIM_LINE_STRIP,
PIPE_PRIM_TRIANGLES,
PIPE_PRIM_TRIANGLE_STRIP,
PIPE_PRIM_TRIANGLE_FAN,
PIPE_PRIM_QUADS,
PIPE_PRIM_QUAD_STRIP,
PIPE_PRIM_POLYGON,
PIPE_PRIM_LINES_ADJACENCY,
PIPE_PRIM_LINE_STRIP_ADJACENCY,
PIPE_PRIM_TRIANGLES_ADJACENCY,
PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY
};
static void varray_prepare(struct draw_pt_front_end *frontend,
unsigned in_prim,
struct draw_pt_middle_end *middle,
unsigned opt)
{
struct varray_frontend *varray = (struct varray_frontend *)frontend;
varray->base.run = varray_run;
varray->input_prim = in_prim;
assert(in_prim < Elements(decompose_prim));
varray->output_prim = decompose_prim[in_prim];
varray->middle = middle;
middle->prepare(middle,
varray->output_prim,
opt, &varray->driver_fetch_max );
/* check that the max is even */
assert((varray->driver_fetch_max & 1) == 0);
varray->fetch_max = MIN2(FETCH_MAX, varray->driver_fetch_max);
}
static void varray_finish(struct draw_pt_front_end *frontend)
{
struct varray_frontend *varray = (struct varray_frontend *)frontend;
varray->middle->finish(varray->middle);
varray->middle = NULL;
}
static void varray_destroy(struct draw_pt_front_end *frontend)
{
FREE(frontend);
}
struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw)
{
ushort i;
struct varray_frontend *varray = CALLOC_STRUCT(varray_frontend);
if (varray == NULL)
return NULL;
varray->base.prepare = varray_prepare;
varray->base.run = NULL;
varray->base.finish = varray_finish;
varray->base.destroy = varray_destroy;
varray->draw = draw;
for (i = 0; i < DRAW_MAX; i++) {
varray->draw_elts[i] = i;
}
return &varray->base;
}
@@ -1,238 +0,0 @@
static void FUNC(struct draw_pt_front_end *frontend,
pt_elt_func get_elt,
const void *elts,
unsigned count)
{
struct varray_frontend *varray = (struct varray_frontend *)frontend;
struct draw_context *draw = varray->draw;
unsigned start = (unsigned)elts;
boolean flatfirst = (draw->rasterizer->flatshade &&
draw->rasterizer->flatshade_first);
unsigned i, j;
ushort flags;
unsigned first, incr;
varray->fetch_start = start;
draw_pt_split_prim(varray->input_prim, &first, &incr);
#if 0
debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
varray->input_prim,
start, count);
#endif
switch (varray->input_prim) {
case PIPE_PRIM_POINTS:
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 0; i < end; i++) {
POINT(varray, i + 0);
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
}
break;
case PIPE_PRIM_LINES:
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 0; i+1 < end; i += 2) {
LINE(varray, DRAW_PIPE_RESET_STIPPLE,
i + 0, i + 1);
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
}
break;
case PIPE_PRIM_LINE_LOOP:
if (count >= 2) {
flags = DRAW_PIPE_RESET_STIPPLE;
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 1; i < end; i++, flags = 0) {
LINE(varray, flags, i - 1, i);
}
LINE(varray, flags, i - 1, 0);
i = end;
fetch_init(varray, end);
varray_flush(varray);
}
}
break;
case PIPE_PRIM_LINE_STRIP:
flags = DRAW_PIPE_RESET_STIPPLE;
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 1; i < end; i++, flags = 0) {
LINE(varray, flags, i - 1, i);
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
}
break;
case PIPE_PRIM_TRIANGLES:
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 0; i+2 < end; i += 3) {
TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
i + 0, i + 1, i + 2);
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
}
break;
case PIPE_PRIM_TRIANGLE_STRIP:
if (flatfirst) {
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 0; i+2 < end; i++) {
TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
i + 0, i + 1 + (i&1), i + 2 - (i&1));
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
if (j + first + i <= count) {
varray->fetch_start -= 2;
i -= 2;
}
}
}
else {
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 0; i + 2 < end; i++) {
TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
i + 0 + (i&1), i + 1 - (i&1), i + 2);
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
if (j + first + i <= count) {
varray->fetch_start -= 2;
i -= 2;
}
}
}
break;
case PIPE_PRIM_TRIANGLE_FAN:
if (count >= 3) {
if (flatfirst) {
flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 0; i+2 < end; i++) {
TRIANGLE(varray, flags, i + 1, i + 2, 0);
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
}
}
else {
flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 0; i+2 < end; i++) {
TRIANGLE(varray, flags, 0, i + 1, i + 2);
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
}
}
}
break;
case PIPE_PRIM_QUADS:
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 0; i+3 < end; i += 4) {
QUAD(varray, i + 0, i + 1, i + 2, i + 3);
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
}
break;
case PIPE_PRIM_QUAD_STRIP:
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 0; i+3 < end; i += 2) {
QUAD(varray, i + 2, i + 0, i + 1, i + 3);
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
if (j + first + i <= count) {
varray->fetch_start -= 2;
i -= 2;
}
}
break;
case PIPE_PRIM_POLYGON:
{
/* These bitflags look a little odd because we submit the
* vertices as (1,2,0) to satisfy flatshade requirements.
*/
const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 0; i+2 < end; i++, flags = edge_middle) {
if (i + 3 == count)
flags |= edge_last;
TRIANGLE(varray, flags, i + 1, i + 2, 0);
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
}
}
break;
default:
assert(0);
break;
}
varray_flush(varray);
}
#undef TRIANGLE
#undef QUAD
#undef POINT
#undef LINE
#undef FUNC
@@ -1,103 +0,0 @@
static unsigned trim( unsigned count, unsigned first, unsigned incr )
{
/*
* count either has been trimmed in draw_pt_arrays or is set to
* (driver)_fetch_max which is hopefully always larger than first.
*/
assert(count >= first);
return count - (count - first) % incr;
}
static void FUNC(struct draw_pt_front_end *frontend,
pt_elt_func get_elt,
const void *elts,
int elt_bias,
unsigned count)
{
struct varray_frontend *varray = (struct varray_frontend *)frontend;
unsigned start = (unsigned) ((char *) elts - (char *) NULL);
unsigned j;
unsigned first, incr;
assert(elt_bias == 0);
draw_pt_split_prim(varray->input_prim, &first, &incr);
/* Sanitize primitive length:
*/
count = trim(count, first, incr);
if (count < first)
return;
#if 0
debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
varray->input_prim,
start, count);
#endif
switch (varray->input_prim) {
case PIPE_PRIM_POINTS:
case PIPE_PRIM_LINES:
case PIPE_PRIM_TRIANGLES:
case PIPE_PRIM_LINE_STRIP:
case PIPE_PRIM_TRIANGLE_STRIP:
case PIPE_PRIM_QUADS:
case PIPE_PRIM_QUAD_STRIP:
case PIPE_PRIM_LINES_ADJACENCY:
case PIPE_PRIM_LINE_STRIP_ADJACENCY:
case PIPE_PRIM_TRIANGLES_ADJACENCY:
case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
for (j = 0; j < count;) {
unsigned remaining = count - j;
unsigned nr = trim( MIN2(varray->driver_fetch_max, remaining), first, incr );
varray_flush_linear(varray, start + j, nr);
j += nr;
if (nr != remaining)
j -= (first - incr);
}
break;
case PIPE_PRIM_LINE_LOOP:
/* Always have to decompose as we've stated that this will be
* emitted as a line-strip.
*/
for (j = 0; j < count;) {
unsigned remaining = count - j;
unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr );
varray_line_loop_segment(varray, start, j, nr, nr == remaining);
j += nr;
if (nr != remaining)
j -= (first - incr);
}
break;
case PIPE_PRIM_POLYGON:
case PIPE_PRIM_TRIANGLE_FAN:
if (count < varray->driver_fetch_max) {
varray_flush_linear(varray, start, count);
}
else {
for ( j = 0; j < count;) {
unsigned remaining = count - j;
unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr );
varray_fan_segment(varray, start, j, nr);
j += nr;
if (nr != remaining)
j -= (first - incr);
}
}
break;
default:
assert(0);
break;
}
}
#undef TRIANGLE
#undef QUAD
#undef POINT
#undef LINE
#undef FUNC
-610
View File
@@ -1,610 +0,0 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "util/u_memory.h"
#include "util/u_prim.h"
#include "draw/draw_context.h"
#include "draw/draw_private.h"
#include "draw/draw_pt.h"
#define CACHE_MAX 256
#define FETCH_MAX 256
#define DRAW_MAX (16*1024)
struct vcache_frontend {
struct draw_pt_front_end base;
struct draw_context *draw;
unsigned in[CACHE_MAX];
ushort out[CACHE_MAX];
ushort draw_elts[DRAW_MAX];
unsigned fetch_elts[FETCH_MAX];
unsigned draw_count;
unsigned fetch_count;
unsigned fetch_max;
struct draw_pt_middle_end *middle;
unsigned input_prim;
unsigned output_prim;
unsigned middle_prim;
unsigned opt;
};
static INLINE void
vcache_flush( struct vcache_frontend *vcache )
{
if (vcache->middle_prim != vcache->output_prim) {
vcache->middle_prim = vcache->output_prim;
vcache->middle->prepare( vcache->middle,
vcache->middle_prim,
vcache->opt,
&vcache->fetch_max );
}
if (vcache->draw_count) {
vcache->middle->run( vcache->middle,
vcache->fetch_elts,
vcache->fetch_count,
vcache->draw_elts,
vcache->draw_count );
}
memset(vcache->in, ~0, sizeof(vcache->in));
vcache->fetch_count = 0;
vcache->draw_count = 0;
}
static INLINE void
vcache_check_flush( struct vcache_frontend *vcache )
{
if (vcache->draw_count + 6 >= DRAW_MAX ||
vcache->fetch_count + 6 >= FETCH_MAX) {
vcache_flush( vcache );
}
}
static INLINE void
vcache_elt( struct vcache_frontend *vcache,
unsigned felt,
ushort flags )
{
unsigned idx = felt % CACHE_MAX;
if (vcache->in[idx] != felt) {
assert(vcache->fetch_count < FETCH_MAX);
vcache->in[idx] = felt;
vcache->out[idx] = (ushort)vcache->fetch_count;
vcache->fetch_elts[vcache->fetch_count++] = felt;
}
vcache->draw_elts[vcache->draw_count++] = vcache->out[idx] | flags;
}
static INLINE void
vcache_triangle( struct vcache_frontend *vcache,
unsigned i0,
unsigned i1,
unsigned i2 )
{
vcache_elt(vcache, i0, 0);
vcache_elt(vcache, i1, 0);
vcache_elt(vcache, i2, 0);
vcache_check_flush(vcache);
}
static INLINE void
vcache_triangle_flags( struct vcache_frontend *vcache,
ushort flags,
unsigned i0,
unsigned i1,
unsigned i2 )
{
vcache_elt(vcache, i0, flags);
vcache_elt(vcache, i1, 0);
vcache_elt(vcache, i2, 0);
vcache_check_flush(vcache);
}
static INLINE void
vcache_line( struct vcache_frontend *vcache,
unsigned i0,
unsigned i1 )
{
vcache_elt(vcache, i0, 0);
vcache_elt(vcache, i1, 0);
vcache_check_flush(vcache);
}
static INLINE void
vcache_line_flags( struct vcache_frontend *vcache,
ushort flags,
unsigned i0,
unsigned i1 )
{
vcache_elt(vcache, i0, flags);
vcache_elt(vcache, i1, 0);
vcache_check_flush(vcache);
}
static INLINE void
vcache_point( struct vcache_frontend *vcache,
unsigned i0 )
{
vcache_elt(vcache, i0, 0);
vcache_check_flush(vcache);
}
static INLINE void
vcache_line_adj_flags( struct vcache_frontend *vcache,
unsigned flags,
unsigned a0, unsigned i0, unsigned i1, unsigned a1 )
{
vcache_elt(vcache, a0, 0);
vcache_elt(vcache, i0, flags);
vcache_elt(vcache, i1, 0);
vcache_elt(vcache, a1, 0);
vcache_check_flush(vcache);
}
static INLINE void
vcache_line_adj( struct vcache_frontend *vcache,
unsigned a0, unsigned i0, unsigned i1, unsigned a1 )
{
vcache_elt(vcache, a0, 0);
vcache_elt(vcache, i0, 0);
vcache_elt(vcache, i1, 0);
vcache_elt(vcache, a1, 0);
vcache_check_flush(vcache);
}
static INLINE void
vcache_triangle_adj_flags( struct vcache_frontend *vcache,
unsigned flags,
unsigned i0, unsigned a0,
unsigned i1, unsigned a1,
unsigned i2, unsigned a2 )
{
vcache_elt(vcache, i0, flags);
vcache_elt(vcache, a0, 0);
vcache_elt(vcache, i1, 0);
vcache_elt(vcache, a1, 0);
vcache_elt(vcache, i2, 0);
vcache_elt(vcache, a2, 0);
vcache_check_flush(vcache);
}
static INLINE void
vcache_triangle_adj( struct vcache_frontend *vcache,
unsigned i0, unsigned a0,
unsigned i1, unsigned a1,
unsigned i2, unsigned a2 )
{
vcache_elt(vcache, i0, 0);
vcache_elt(vcache, a0, 0);
vcache_elt(vcache, i1, 0);
vcache_elt(vcache, a1, 0);
vcache_elt(vcache, i2, 0);
vcache_elt(vcache, a2, 0);
vcache_check_flush(vcache);
}
/* At least for now, we're back to using a template include file for
* this. The two paths aren't too different though - it may be
* possible to reunify them.
*/
#define TRIANGLE(flags,i0,i1,i2) vcache_triangle_flags(vcache,flags,i0,i1,i2)
#define LINE(flags,i0,i1) vcache_line_flags(vcache,flags,i0,i1)
#define POINT(i0) vcache_point(vcache,i0)
#define LINE_ADJ(flags,a0,i0,i1,a1) \
vcache_line_adj_flags(vcache,flags,a0,i0,i1,a1)
#define TRIANGLE_ADJ(flags,i0,a0,i1,a1,i2,a2) \
vcache_triangle_adj_flags(vcache,flags,i0,a0,i1,a1,i2,a2)
#define FUNC vcache_run_extras
#include "draw_pt_vcache_tmp.h"
#define TRIANGLE(flags,i0,i1,i2) vcache_triangle(vcache,i0,i1,i2)
#define LINE(flags,i0,i1) vcache_line(vcache,i0,i1)
#define POINT(i0) vcache_point(vcache,i0)
#define LINE_ADJ(flags,a0,i0,i1,a1) \
vcache_line_adj(vcache,a0,i0,i1,a1)
#define TRIANGLE_ADJ(flags,i0,a0,i1,a1,i2,a2) \
vcache_triangle_adj(vcache,i0,a0,i1,a1,i2,a2)
#define FUNC vcache_run
#include "draw_pt_vcache_tmp.h"
static INLINE void
rebase_uint_elts( const unsigned *src,
unsigned count,
int delta,
ushort *dest )
{
unsigned i;
for (i = 0; i < count; i++)
dest[i] = (ushort)(src[i] + delta);
}
static INLINE void
rebase_ushort_elts( const ushort *src,
unsigned count,
int delta,
ushort *dest )
{
unsigned i;
for (i = 0; i < count; i++)
dest[i] = (ushort)(src[i] + delta);
}
static INLINE void
rebase_ubyte_elts( const ubyte *src,
unsigned count,
int delta,
ushort *dest )
{
unsigned i;
for (i = 0; i < count; i++)
dest[i] = (ushort)(src[i] + delta);
}
static INLINE void
translate_uint_elts( const unsigned *src,
unsigned count,
ushort *dest )
{
unsigned i;
for (i = 0; i < count; i++)
dest[i] = (ushort)(src[i]);
}
static INLINE void
translate_ushort_elts( const ushort *src,
unsigned count,
ushort *dest )
{
unsigned i;
for (i = 0; i < count; i++)
dest[i] = (ushort)(src[i]);
}
static INLINE void
translate_ubyte_elts( const ubyte *src,
unsigned count,
ushort *dest )
{
unsigned i;
for (i = 0; i < count; i++)
dest[i] = (ushort)(src[i]);
}
#if 0
static INLINE enum pipe_format
format_from_get_elt( pt_elt_func get_elt )
{
switch (draw->pt.user.eltSize) {
case 1: return PIPE_FORMAT_R8_UNORM;
case 2: return PIPE_FORMAT_R16_UNORM;
case 4: return PIPE_FORMAT_R32_UNORM;
default: return PIPE_FORMAT_NONE;
}
}
#endif
/**
* Check if any vertex attributes use instance divisors.
* Note that instance divisors complicate vertex fetching so we need
* to take the vcache path when they're in use.
*/
static boolean
any_instance_divisors(const struct draw_context *draw)
{
uint i;
for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
uint div = draw->pt.vertex_element[i].instance_divisor;
if (div)
return TRUE;
}
return FALSE;
}
static INLINE void
vcache_check_run( struct draw_pt_front_end *frontend,
pt_elt_func get_elt,
const void *elts,
int elt_bias,
unsigned draw_count )
{
struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
struct draw_context *draw = vcache->draw;
const unsigned min_index = draw->pt.user.min_index;
const unsigned max_index = draw->pt.user.max_index;
const unsigned index_size = draw->pt.user.eltSize;
unsigned fetch_count;
const ushort *transformed_elts;
ushort *storage = NULL;
boolean ok = FALSE;
/* debug: verify indexes are in range [min_index, max_index] */
if (0) {
unsigned i;
for (i = 0; i < draw_count; i++) {
if (index_size == 1) {
assert( ((const ubyte *) elts)[i] >= min_index);
assert( ((const ubyte *) elts)[i] <= max_index);
}
else if (index_size == 2) {
assert( ((const ushort *) elts)[i] >= min_index);
assert( ((const ushort *) elts)[i] <= max_index);
}
else {
assert(index_size == 4);
assert( ((const uint *) elts)[i] >= min_index);
assert( ((const uint *) elts)[i] <= max_index);
}
}
}
/* Note: max_index is frequently 0xffffffff so we have to be sure
* that any arithmetic involving max_index doesn't overflow!
*/
if (max_index >= (unsigned) DRAW_PIPE_MAX_VERTICES)
goto fail;
if (any_instance_divisors(draw))
goto fail;
fetch_count = max_index + 1 - min_index;
if (0)
debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count,
vcache->fetch_max,
draw_count);
if (elt_bias + max_index >= DRAW_PIPE_MAX_VERTICES ||
fetch_count >= UNDEFINED_VERTEX_ID ||
fetch_count > draw_count) {
if (0) debug_printf("fail\n");
goto fail;
}
if (vcache->middle_prim != vcache->input_prim) {
vcache->middle_prim = vcache->input_prim;
vcache->middle->prepare( vcache->middle,
vcache->middle_prim,
vcache->opt,
&vcache->fetch_max );
}
assert((elt_bias >= 0 && min_index + elt_bias >= min_index) ||
(elt_bias < 0 && min_index + elt_bias < min_index));
if (min_index == 0 &&
index_size == 2) {
transformed_elts = (const ushort *)elts;
}
else {
storage = MALLOC( draw_count * sizeof(ushort) );
if (!storage)
goto fail;
if (min_index == 0) {
switch(index_size) {
case 1:
translate_ubyte_elts( (const ubyte *)elts,
draw_count,
storage );
break;
case 2:
translate_ushort_elts( (const ushort *)elts,
draw_count,
storage );
break;
case 4:
translate_uint_elts( (const uint *)elts,
draw_count,
storage );
break;
default:
assert(0);
FREE(storage);
return;
}
}
else {
switch(index_size) {
case 1:
rebase_ubyte_elts( (const ubyte *)elts,
draw_count,
0 - (int)min_index,
storage );
break;
case 2:
rebase_ushort_elts( (const ushort *)elts,
draw_count,
0 - (int)min_index,
storage );
break;
case 4:
rebase_uint_elts( (const uint *)elts,
draw_count,
0 - (int)min_index,
storage );
break;
default:
assert(0);
FREE(storage);
return;
}
}
transformed_elts = storage;
}
if (fetch_count < UNDEFINED_VERTEX_ID)
ok = vcache->middle->run_linear_elts( vcache->middle,
min_index + elt_bias, /* start */
fetch_count,
transformed_elts,
draw_count );
FREE(storage);
if (ok)
return;
debug_printf("failed to execute atomic draw elts for %d/%d, splitting up\n",
fetch_count, draw_count);
fail:
vcache_run( frontend, get_elt, elts, elt_bias, draw_count );
}
static void
vcache_prepare( struct draw_pt_front_end *frontend,
unsigned in_prim,
struct draw_pt_middle_end *middle,
unsigned opt )
{
struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
if (opt & PT_PIPELINE) {
vcache->base.run = vcache_run_extras;
}
else {
vcache->base.run = vcache_check_run;
}
/* VCache will always emit the reduced version of its input
* primitive, ie STRIP/FANS become TRIS, etc.
*
* This is not to be confused with what the GS might be up to,
* which is a separate issue.
*/
vcache->input_prim = in_prim;
switch (in_prim) {
case PIPE_PRIM_LINES_ADJACENCY:
case PIPE_PRIM_LINE_STRIP_ADJACENCY:
vcache->output_prim = PIPE_PRIM_LINES_ADJACENCY;
break;
case PIPE_PRIM_TRIANGLES_ADJACENCY:
case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
vcache->output_prim = PIPE_PRIM_TRIANGLES_ADJACENCY;
break;
default:
vcache->output_prim = u_reduced_prim(in_prim);
}
vcache->middle = middle;
vcache->opt = opt;
/* Have to run prepare here, but try and guess a good prim for
* doing so:
*/
vcache->middle_prim = (opt & PT_PIPELINE)
? vcache->output_prim : vcache->input_prim;
middle->prepare( middle,
vcache->middle_prim,
opt, &vcache->fetch_max );
}
static void
vcache_finish( struct draw_pt_front_end *frontend )
{
struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
vcache->middle->finish( vcache->middle );
vcache->middle = NULL;
}
static void
vcache_destroy( struct draw_pt_front_end *frontend )
{
FREE(frontend);
}
struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw )
{
struct vcache_frontend *vcache = CALLOC_STRUCT( vcache_frontend );
if (vcache == NULL)
return NULL;
vcache->base.prepare = vcache_prepare;
vcache->base.run = NULL;
vcache->base.finish = vcache_finish;
vcache->base.destroy = vcache_destroy;
vcache->draw = draw;
memset(vcache->in, ~0, sizeof(vcache->in));
return &vcache->base;
}
@@ -1,19 +0,0 @@
#define FUNC_VARS \
struct draw_pt_front_end *frontend, \
pt_elt_func get_elt, \
const void *elts, \
int elt_bias, \
unsigned count
#define LOCAL_VARS \
struct vcache_frontend *vcache = (struct vcache_frontend *) frontend; \
struct draw_context *draw = vcache->draw; \
const unsigned prim = vcache->input_prim; \
const boolean last_vertex_last = !(draw->rasterizer->flatshade && \
draw->rasterizer->flatshade_first);
#define GET_ELT(idx) (get_elt(elts, idx) + elt_bias)
#define FUNC_EXIT do { vcache_flush(vcache); } while (0)
#include "draw_decompose_tmp.h"
+208
View File
@@ -0,0 +1,208 @@
/*
* Mesa 3-D graphics library
* Version: 7.9
*
* Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* Copyright (C) 2010 LunarG Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "util/u_math.h"
#include "util/u_memory.h"
#include "draw/draw_context.h"
#include "draw/draw_private.h"
#include "draw/draw_pt.h"
#define SEGMENT_SIZE 1024
#define MAP_SIZE 256
struct vsplit_frontend {
struct draw_pt_front_end base;
struct draw_context *draw;
unsigned prim;
struct draw_pt_middle_end *middle;
unsigned max_vertices;
ushort segment_size;
/* buffers for splitting */
unsigned fetch_elts[SEGMENT_SIZE];
ushort draw_elts[SEGMENT_SIZE];
ushort identity_draw_elts[SEGMENT_SIZE];
struct {
/* map a fetch element to a draw element */
unsigned fetches[MAP_SIZE];
ushort draws[MAP_SIZE];
boolean has_max_fetch;
ushort num_fetch_elts;
ushort num_draw_elts;
} cache;
};
static void
vsplit_clear_cache(struct vsplit_frontend *vsplit)
{
memset(vsplit->cache.fetches, 0xff, sizeof(vsplit->cache.fetches));
vsplit->cache.has_max_fetch = FALSE;
vsplit->cache.num_fetch_elts = 0;
vsplit->cache.num_draw_elts = 0;
}
static void
vsplit_flush_cache(struct vsplit_frontend *vsplit, unsigned flags)
{
vsplit->middle->run(vsplit->middle,
vsplit->fetch_elts, vsplit->cache.num_fetch_elts,
vsplit->draw_elts, vsplit->cache.num_draw_elts, flags);
}
/**
* Add a fetch element and add it to the draw elements.
*/
static INLINE void
vsplit_add_cache(struct vsplit_frontend *vsplit, unsigned fetch)
{
unsigned hash = fetch % MAP_SIZE;
if (vsplit->cache.fetches[hash] != fetch) {
/* update cache */
vsplit->cache.fetches[hash] = fetch;
vsplit->cache.draws[hash] = vsplit->cache.num_fetch_elts;
/* add fetch */
assert(vsplit->cache.num_fetch_elts < vsplit->segment_size);
vsplit->fetch_elts[vsplit->cache.num_fetch_elts++] = fetch;
}
vsplit->draw_elts[vsplit->cache.num_draw_elts++] = vsplit->cache.draws[hash];
}
/**
* Add a fetch element and add it to the draw elements. The fetch element is
* in full range (uint).
*/
static INLINE void
vsplit_add_cache_uint(struct vsplit_frontend *vsplit, unsigned fetch)
{
/* special care for 0xffffffff */
if (fetch == 0xffffffff && !vsplit->cache.has_max_fetch) {
unsigned hash = fetch % MAP_SIZE;
vsplit->cache.fetches[hash] = fetch - 1; /* force update */
vsplit->cache.has_max_fetch = TRUE;
}
vsplit_add_cache(vsplit, fetch);
}
#define FUNC vsplit_run_linear
#include "draw_pt_vsplit_tmp.h"
#define FUNC vsplit_run_ubyte
#define ELT_TYPE ubyte
#define ADD_CACHE(vsplit, fetch) vsplit_add_cache(vsplit, fetch)
#include "draw_pt_vsplit_tmp.h"
#define FUNC vsplit_run_ushort
#define ELT_TYPE ushort
#define ADD_CACHE(vsplit, fetch) vsplit_add_cache(vsplit, fetch)
#include "draw_pt_vsplit_tmp.h"
#define FUNC vsplit_run_uint
#define ELT_TYPE uint
#define ADD_CACHE(vsplit, fetch) vsplit_add_cache_uint(vsplit, fetch)
#include "draw_pt_vsplit_tmp.h"
static void vsplit_prepare(struct draw_pt_front_end *frontend,
unsigned in_prim,
struct draw_pt_middle_end *middle,
unsigned opt)
{
struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend;
switch (vsplit->draw->pt.user.eltSize) {
case 0:
vsplit->base.run = vsplit_run_linear;
break;
case 1:
vsplit->base.run = vsplit_run_ubyte;
break;
case 2:
vsplit->base.run = vsplit_run_ushort;
break;
case 4:
vsplit->base.run = vsplit_run_uint;
break;
default:
assert(0);
break;
}
/* split only */
vsplit->prim = in_prim;
vsplit->middle = middle;
middle->prepare(middle, vsplit->prim, opt, &vsplit->max_vertices);
vsplit->segment_size = MIN2(SEGMENT_SIZE, vsplit->max_vertices);
}
static void vsplit_finish(struct draw_pt_front_end *frontend)
{
struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend;
vsplit->middle->finish(vsplit->middle);
vsplit->middle = NULL;
}
static void vsplit_destroy(struct draw_pt_front_end *frontend)
{
FREE(frontend);
}
struct draw_pt_front_end *draw_pt_vsplit(struct draw_context *draw)
{
struct vsplit_frontend *vsplit = CALLOC_STRUCT(vsplit_frontend);
ushort i;
if (!vsplit)
return NULL;
vsplit->base.prepare = vsplit_prepare;
vsplit->base.run = NULL;
vsplit->base.finish = vsplit_finish;
vsplit->base.destroy = vsplit_destroy;
vsplit->draw = draw;
for (i = 0; i < SEGMENT_SIZE; i++)
vsplit->identity_draw_elts[i] = i;
return &vsplit->base;
}
@@ -0,0 +1,309 @@
/*
* Mesa 3-D graphics library
* Version: 7.9
*
* Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* Copyright (C) 2010 LunarG Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#define CONCAT2(name, elt_type) name ## elt_type
#define CONCAT(name, elt_type) CONCAT2(name, elt_type)
#ifdef ELT_TYPE
/**
* Fetch all elements in [min_index, max_index] with bias, and use the
* (rebased) index buffer as the draw elements.
*/
static boolean
CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit,
unsigned istart, unsigned icount)
{
struct draw_context *draw = vsplit->draw;
const ELT_TYPE *ib = (const ELT_TYPE *)
((const char *) draw->pt.user.elts + draw->pt.index_buffer.offset);
const unsigned min_index = draw->pt.user.min_index;
const unsigned max_index = draw->pt.user.max_index;
const int elt_bias = draw->pt.user.eltBias;
unsigned fetch_start, fetch_count;
const ushort *draw_elts = NULL;
unsigned i;
/* use the ib directly */
if (min_index == 0 && sizeof(ib[0]) == sizeof(draw_elts[0])) {
if (icount > vsplit->max_vertices)
return FALSE;
for (i = 0; i < icount; i++) {
ELT_TYPE idx = ib[istart + i];
assert(idx >= min_index && idx <= max_index);
}
draw_elts = (const ushort *) ib;
}
else {
/* have to go through vsplit->draw_elts */
if (icount > vsplit->segment_size)
return FALSE;
}
/* this is faster only when we fetch less elements than the normal path */
if (max_index - min_index > icount - 1)
return FALSE;
if (elt_bias < 0 && min_index < -elt_bias)
return FALSE;
/* why this check? */
for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
if (draw->pt.vertex_element[i].instance_divisor)
return FALSE;
}
fetch_start = min_index + elt_bias;
fetch_count = max_index - min_index + 1;
if (!draw_elts) {
if (min_index == 0) {
for (i = 0; i < icount; i++) {
ELT_TYPE idx = ib[istart + i];
assert(idx >= min_index && idx <= max_index);
vsplit->draw_elts[i] = (ushort) idx;
}
}
else {
for (i = 0; i < icount; i++) {
ELT_TYPE idx = ib[istart + i];
assert(idx >= min_index && idx <= max_index);
vsplit->draw_elts[i] = (ushort) (idx - min_index);
}
}
draw_elts = vsplit->draw_elts;
}
return vsplit->middle->run_linear_elts(vsplit->middle,
fetch_start, fetch_count,
draw_elts, icount, 0x0);
}
/**
* Use the cache to prepare the fetch and draw elements, and flush.
*
* When spoken is TRUE, ispoken replaces istart; When close is TRUE, iclose is
* appended.
*/
static INLINE void
CONCAT(vsplit_segment_cache_, ELT_TYPE)(struct vsplit_frontend *vsplit,
unsigned flags,
unsigned istart, unsigned icount,
boolean spoken, unsigned ispoken,
boolean close, unsigned iclose)
{
struct draw_context *draw = vsplit->draw;
const ELT_TYPE *ib = (const ELT_TYPE *)
((const char *) draw->pt.user.elts + draw->pt.index_buffer.offset);
const int ibias = draw->pt.user.eltBias;
unsigned i;
assert(icount + !!close <= vsplit->segment_size);
vsplit_clear_cache(vsplit);
spoken = !!spoken;
if (ibias == 0) {
if (spoken)
ADD_CACHE(vsplit, ib[ispoken]);
for (i = spoken; i < icount; i++)
ADD_CACHE(vsplit, ib[istart + i]);
if (close)
ADD_CACHE(vsplit, ib[iclose]);
}
else if (ibias > 0) {
if (spoken)
ADD_CACHE(vsplit, (uint) ib[ispoken] + ibias);
for (i = spoken; i < icount; i++)
ADD_CACHE(vsplit, (uint) ib[istart + i] + ibias);
if (close)
ADD_CACHE(vsplit, (uint) ib[iclose] + ibias);
}
else {
if (spoken) {
if (ib[ispoken] < -ibias)
return;
ADD_CACHE(vsplit, ib[ispoken] + ibias);
}
for (i = spoken; i < icount; i++) {
if (ib[istart + i] < -ibias)
return;
ADD_CACHE(vsplit, ib[istart + i] + ibias);
}
if (close) {
if (ib[iclose] < -ibias)
return;
ADD_CACHE(vsplit, ib[iclose] + ibias);
}
}
vsplit_flush_cache(vsplit, flags);
}
static void
CONCAT(vsplit_segment_simple_, ELT_TYPE)(struct vsplit_frontend *vsplit,
unsigned flags,
unsigned istart,
unsigned icount)
{
CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
flags, istart, icount, FALSE, 0, FALSE, 0);
}
static void
CONCAT(vsplit_segment_loop_, ELT_TYPE)(struct vsplit_frontend *vsplit,
unsigned flags,
unsigned istart,
unsigned icount,
unsigned i0)
{
const boolean close_loop = ((flags) == DRAW_SPLIT_BEFORE);
CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
flags, istart, icount, FALSE, 0, close_loop, i0);
}
static void
CONCAT(vsplit_segment_fan_, ELT_TYPE)(struct vsplit_frontend *vsplit,
unsigned flags,
unsigned istart,
unsigned icount,
unsigned i0)
{
const boolean use_spoken = (((flags) & DRAW_SPLIT_BEFORE) != 0);
CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
flags, istart, icount, use_spoken, i0, FALSE, 0);
}
#define LOCAL_VARS \
struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend; \
const unsigned prim = vsplit->prim; \
const unsigned max_count_simple = vsplit->segment_size; \
const unsigned max_count_loop = vsplit->segment_size - 1; \
const unsigned max_count_fan = vsplit->segment_size;
#define PRIMITIVE(istart, icount) \
CONCAT(vsplit_primitive_, ELT_TYPE)(vsplit, istart, icount)
#else /* ELT_TYPE */
static void
vsplit_segment_simple_linear(struct vsplit_frontend *vsplit, unsigned flags,
unsigned istart, unsigned icount)
{
assert(icount <= vsplit->max_vertices);
vsplit->middle->run_linear(vsplit->middle, istart, icount, flags);
}
static void
vsplit_segment_loop_linear(struct vsplit_frontend *vsplit, unsigned flags,
unsigned istart, unsigned icount, unsigned i0)
{
boolean close_loop = (flags == DRAW_SPLIT_BEFORE);
unsigned nr;
assert(icount + !!close_loop <= vsplit->segment_size);
if (close_loop) {
for (nr = 0; nr < icount; nr++)
vsplit->fetch_elts[nr] = istart + nr;
vsplit->fetch_elts[nr++] = i0;
vsplit->middle->run(vsplit->middle, vsplit->fetch_elts, nr,
vsplit->identity_draw_elts, nr, flags);
}
else {
vsplit->middle->run_linear(vsplit->middle, istart, icount, flags);
}
}
static void
vsplit_segment_fan_linear(struct vsplit_frontend *vsplit, unsigned flags,
unsigned istart, unsigned icount, unsigned i0)
{
boolean use_spoken = ((flags & DRAW_SPLIT_BEFORE) != 0);
unsigned nr = 0, i;
assert(icount + !!use_spoken <= vsplit->segment_size);
if (use_spoken) {
vsplit->fetch_elts[nr++] = i0;
for (i = 1 ; i < icount; i++)
vsplit->fetch_elts[nr++] = istart + i;
vsplit->middle->run(vsplit->middle, vsplit->fetch_elts, nr,
vsplit->identity_draw_elts, nr, flags);
}
else {
vsplit->middle->run_linear(vsplit->middle, istart, icount, flags);
}
}
#define LOCAL_VARS \
struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend; \
const unsigned prim = vsplit->prim; \
const unsigned max_count_simple = vsplit->max_vertices; \
const unsigned max_count_loop = vsplit->segment_size - 1; \
const unsigned max_count_fan = vsplit->segment_size;
#define PRIMITIVE(istart, icount) FALSE
#define ELT_TYPE linear
#endif /* ELT_TYPE */
#define FUNC_VARS \
struct draw_pt_front_end *frontend, \
unsigned start, \
unsigned count
#define SEGMENT_SIMPLE(flags, istart, icount) \
CONCAT(vsplit_segment_simple_, ELT_TYPE)(vsplit, flags, istart, icount)
#define SEGMENT_LOOP(flags, istart, icount, i0) \
CONCAT(vsplit_segment_loop_, ELT_TYPE)(vsplit, flags, istart, icount, i0)
#define SEGMENT_FAN(flags, istart, icount, i0) \
CONCAT(vsplit_segment_fan_, ELT_TYPE)(vsplit, flags, istart, icount, i0)
#include "draw_split_tmp.h"
#undef CONCAT2
#undef CONCAT
#undef ELT_TYPE
#undef ADD_CACHE
@@ -7,11 +7,9 @@
#define FUNC_ENTER \
/* declare more local vars */ \
struct draw_context *draw = so->draw; \
const unsigned prim = input_prims->prim; \
const boolean last_vertex_last = \
!(draw->rasterizer->flatshade && \
draw->rasterizer->flatshade_first); \
const unsigned prim_flags = input_prims->flags; \
const boolean last_vertex_last = TRUE; \
do { \
debug_assert(input_prims->primitive_count == 1); \
switch (prim) { \
+176
View File
@@ -0,0 +1,176 @@
/*
* Mesa 3-D graphics library
* Version: 7.9
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* Copyright (C) 2010 LunarG Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
static void
FUNC(FUNC_VARS)
{
unsigned first, incr;
LOCAL_VARS
/*
* prim, start, count, and max_count_{simple,loop,fan} should have been
* defined
*/
if (0) {
debug_printf("%s: prim 0x%x, start %d, count %d, max_count_simple %d, "
"max_count_loop %d, max_count_fan %d\n",
__FUNCTION__, prim, start, count, max_count_simple,
max_count_loop, max_count_fan);
}
draw_pt_split_prim(prim, &first, &incr);
/* sanitize primitive length */
count = draw_pt_trim_count(count, first, incr);
if (count < first)
return;
/* try flushing the entire primitive */
if (PRIMITIVE(start, count))
return;
/* must be able to at least flush two complete primitives */
assert(max_count_simple >= first + incr &&
max_count_loop >= first + incr &&
max_count_fan >= first + incr);
/* no splitting required */
if (count <= max_count_simple) {
SEGMENT_SIMPLE(0x0, start, count);
}
else {
const unsigned rollback = first - incr;
unsigned flags = DRAW_SPLIT_AFTER, seg_start = 0, seg_max;
/*
* Both count and seg_max below are explicitly trimmed. Because
*
* seg_start = N * (seg_max - rollback) = N' * incr,
*
* we have
*
* remaining = count - seg_start = first + N'' * incr.
*
* That is, remaining is implicitly trimmed.
*/
switch (prim) {
case PIPE_PRIM_POINTS:
case PIPE_PRIM_LINES:
case PIPE_PRIM_LINE_STRIP:
case PIPE_PRIM_TRIANGLES:
case PIPE_PRIM_TRIANGLE_STRIP:
case PIPE_PRIM_QUADS:
case PIPE_PRIM_QUAD_STRIP:
case PIPE_PRIM_LINES_ADJACENCY:
case PIPE_PRIM_LINE_STRIP_ADJACENCY:
case PIPE_PRIM_TRIANGLES_ADJACENCY:
case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
seg_max =
draw_pt_trim_count(MIN2(max_count_simple, count), first, incr);
if (prim == PIPE_PRIM_TRIANGLE_STRIP ||
prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY) {
/* make sure we flush even number of triangles at a time */
if (seg_max < count && !(((seg_max - first) / incr) & 1))
seg_max -= incr;
}
do {
const unsigned remaining = count - seg_start;
if (remaining > seg_max) {
SEGMENT_SIMPLE(flags, start + seg_start, seg_max);
seg_start += seg_max - rollback;
flags |= DRAW_SPLIT_BEFORE;
}
else {
flags &= ~DRAW_SPLIT_AFTER;
SEGMENT_SIMPLE(flags, start + seg_start, remaining);
seg_start += remaining;
}
} while (seg_start < count);
break;
case PIPE_PRIM_LINE_LOOP:
seg_max =
draw_pt_trim_count(MIN2(max_count_loop, count), first, incr);
do {
const unsigned remaining = count - seg_start;
if (remaining > seg_max) {
SEGMENT_LOOP(flags, start + seg_start, seg_max, start);
seg_start += seg_max - rollback;
flags |= DRAW_SPLIT_BEFORE;
}
else {
flags &= ~DRAW_SPLIT_AFTER;
SEGMENT_LOOP(flags, start + seg_start, remaining, start);
seg_start += remaining;
}
} while (seg_start < count);
break;
case PIPE_PRIM_TRIANGLE_FAN:
case PIPE_PRIM_POLYGON:
seg_max =
draw_pt_trim_count(MIN2(max_count_fan, count), first, incr);
do {
const unsigned remaining = count - seg_start;
if (remaining > seg_max) {
SEGMENT_FAN(flags, start + seg_start, seg_max, start);
seg_start += seg_max - rollback;
flags |= DRAW_SPLIT_BEFORE;
}
else {
flags &= ~DRAW_SPLIT_AFTER;
SEGMENT_FAN(flags, start + seg_start, remaining, start);
seg_start += remaining;
}
} while (seg_start < count);
break;
default:
assert(0);
break;
}
}
}
#undef FUNC
#undef FUNC_VARS
#undef LOCAL_VARS
#undef PRIMITIVE
#undef SEGMENT_SIMPLE
#undef SEGMENT_LOOP
#undef SEGMENT_FAN
@@ -28,6 +28,7 @@
#include "util/u_math.h"
#include "util/u_memory.h"
#include "pipe/p_shader_tokens.h"
#include "pipe/p_screen.h"
#include "draw_private.h"
#include "draw_context.h"
@@ -109,6 +110,11 @@ draw_create_vs_llvm(struct draw_context *draw,
tgsi_scan_shader(state->tokens, &vs->base.info);
vs->variant_key_size =
draw_llvm_variant_key_size(
vs->base.info.file_max[TGSI_FILE_INPUT]+1,
vs->base.info.file_max[TGSI_FILE_SAMPLER]+1);
vs->base.draw = draw;
vs->base.prepare = vs_llvm_prepare;
vs->base.run_linear = vs_llvm_run_linear;
+29 -19
View File
@@ -59,14 +59,6 @@
#include "lp_bld_arit.h"
/*
* XXX: Increasing eliminates some artifacts, but adds others, most
* noticeably corruption in the Earth halo in Google Earth.
*/
#define RCP_NEWTON_STEPS 0
#define RSQRT_NEWTON_STEPS 0
#define EXP_POLY_DEGREE 3
#define LOG_POLY_DEGREE 5
@@ -267,7 +259,7 @@ lp_build_add(struct lp_build_context *bld,
}
/** Return the sum of the elements of a */
/** Return the scalar sum of the elements of a */
LLVMValueRef
lp_build_sum_vector(struct lp_build_context *bld,
LLVMValueRef a)
@@ -278,11 +270,9 @@ lp_build_sum_vector(struct lp_build_context *bld,
assert(lp_check_value(type, a));
if (a == bld->zero)
return bld->zero;
if (a == bld->undef)
return bld->undef;
assert(type.length > 1);
if (type.length == 1) {
return a;
}
assert(!bld->type.norm);
@@ -546,7 +536,7 @@ lp_build_mul_imm(struct lp_build_context *bld,
if(b == 2 && bld->type.floating)
return lp_build_add(bld, a, a);
if(util_is_pot(b)) {
if(util_is_power_of_two(b)) {
unsigned shift = ffs(b) - 1;
if(bld->type.floating) {
@@ -1266,6 +1256,11 @@ lp_build_sqrt(struct lp_build_context *bld,
*
* x_{i+1} = x_i * (2 - a * x_i)
*
* XXX: Unfortunately this won't give IEEE-754 conformant results for 0 or
* +/-Inf, giving NaN instead. Certain applications rely on this behavior,
* such as Google Earth, which does RCP(RSQRT(0.0) when drawing the Earth's
* halo. It would be necessary to clamp the argument to prevent this.
*
* See also:
* - http://en.wikipedia.org/wiki/Division_(digital)#Newton.E2.80.93Raphson_division
* - http://softwarecommunity.intel.com/articles/eng/1818.htm
@@ -1306,13 +1301,27 @@ lp_build_rcp(struct lp_build_context *bld,
if(LLVMIsConstant(a))
return LLVMConstFDiv(bld->one, a);
if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
/*
* We don't use RCPPS because:
* - it only has 10bits of precision
* - it doesn't even get the reciprocate of 1.0 exactly
* - doing Newton-Rapshon steps yields wrong (NaN) values for 0.0 or Inf
* - for recent processors the benefit over DIVPS is marginal, a case
* depedent
*
* We could still use it on certain processors if benchmarks show that the
* RCPPS plus necessary workarounds are still preferrable to DIVPS; or for
* particular uses that require less workarounds.
*/
if (FALSE && util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
const unsigned num_iterations = 0;
LLVMValueRef res;
unsigned i;
res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", bld->vec_type, a);
for (i = 0; i < RCP_NEWTON_STEPS; ++i) {
for (i = 0; i < num_iterations; ++i) {
res = lp_build_rcp_refine(bld, a, res);
}
@@ -1363,13 +1372,14 @@ lp_build_rsqrt(struct lp_build_context *bld,
assert(type.floating);
if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
if (util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
const unsigned num_iterations = 0;
LLVMValueRef res;
unsigned i;
res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", bld->vec_type, a);
for (i = 0; i < RSQRT_NEWTON_STEPS; ++i) {
for (i = 0; i < num_iterations; ++i) {
res = lp_build_rsqrt_refine(bld, a, res);
}
+1 -1
View File
@@ -46,7 +46,7 @@
boolean
lp_check_alignment(const void *ptr, unsigned alignment)
{
assert(util_is_pot(alignment));
assert(util_is_power_of_two(alignment));
return ((uintptr_t)ptr & (alignment - 1)) == 0;
}
@@ -388,7 +388,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
if (format_matches_type(format_desc, type) &&
format_desc->block.bits <= type.width * 4 &&
util_is_pot(format_desc->block.bits)) {
util_is_power_of_two(format_desc->block.bits)) {
LLVMValueRef packed;
/*
@@ -416,7 +416,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
format_desc->block.width == 1 &&
format_desc->block.height == 1 &&
util_is_pot(format_desc->block.bits) &&
util_is_power_of_two(format_desc->block.bits) &&
format_desc->block.bits <= 32 &&
format_desc->is_bitmask &&
!format_desc->is_mixed &&
@@ -40,6 +40,7 @@
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/ExecutionEngine/JITEventListener.h>
#include <llvm/Support/CommandLine.h>
#include <llvm/Support/PrettyStackTrace.h>
#include "pipe/p_config.h"
#include "util/u_debug.h"
@@ -143,7 +144,6 @@ lp_set_target_options(void)
llvm::UnsafeFPMath = true;
#endif
#if 0
/*
* LLVM will generate MMX instructions for vectors <= 64 bits, leading to
* innefficient code, and in 32bit systems, to the corruption of the FPU
@@ -152,10 +152,8 @@ lp_set_target_options(void)
* See also:
* - http://llvm.org/bugs/show_bug.cgi?id=3287
* - http://l4.me.uk/post/2009/06/07/llvm-wrinkle-3-configuration-what-configuration/
*
* XXX: Unfortunately this is not working.
*/
static boolean first = FALSE;
static boolean first = TRUE;
if (first) {
static const char* options[] = {
"prog",
@@ -164,7 +162,13 @@ lp_set_target_options(void)
llvm::cl::ParseCommandLineOptions(2, const_cast<char**>(options));
first = FALSE;
}
#endif
/*
* By default LLVM adds a signal handler to output a pretty stack trace.
* This signal handler is never removed, causing problems when unloading the
* shared object where the gallium driver resides.
*/
llvm::DisablePrettyStackTrace = true;
}
@@ -37,6 +37,8 @@
#define LP_BLD_PACK_H
#include "pipe/p_compiler.h"
#include "gallivm/lp_bld.h"
+68 -35
View File
@@ -82,9 +82,9 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
state->swizzle_a = view->swizzle_a;
state->target = texture->target;
state->pot_width = util_is_pot(texture->width0);
state->pot_height = util_is_pot(texture->height0);
state->pot_depth = util_is_pot(texture->depth0);
state->pot_width = util_is_power_of_two(texture->width0);
state->pot_height = util_is_power_of_two(texture->height0);
state->pot_depth = util_is_power_of_two(texture->depth0);
state->wrap_s = sampler->wrap_s;
state->wrap_t = sampler->wrap_t;
@@ -123,6 +123,52 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
}
/**
* Compute the partial offset of a pixel block along an arbitrary axis.
*
* @param coord coordinate in pixels
* @param stride number of bytes between rows of successive pixel blocks
* @param block_length number of pixels in a pixels block along the coordinate
* axis
* @param out_offset resulting relative offset of the pixel block in bytes
* @param out_subcoord resulting sub-block pixel coordinate
*/
void
lp_build_sample_partial_offset(struct lp_build_context *bld,
unsigned block_length,
LLVMValueRef coord,
LLVMValueRef stride,
LLVMValueRef *out_offset,
LLVMValueRef *out_subcoord)
{
LLVMValueRef offset;
LLVMValueRef subcoord;
if (block_length == 1) {
subcoord = bld->zero;
}
else {
/*
* Pixel blocks have power of two dimensions. LLVM should convert the
* rem/div to bit arithmetic.
* TODO: Verify this.
*/
LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
subcoord = LLVMBuildURem(bld->builder, coord, block_width, "");
coord = LLVMBuildUDiv(bld->builder, coord, block_width, "");
}
offset = lp_build_mul(bld, coord, stride);
assert(out_offset);
assert(out_subcoord);
*out_offset = offset;
*out_subcoord = subcoord;
}
/**
* Compute the offset of a pixel block.
*
@@ -144,48 +190,35 @@ lp_build_sample_offset(struct lp_build_context *bld,
{
LLVMValueRef x_stride;
LLVMValueRef offset;
LLVMValueRef i;
LLVMValueRef j;
/*
* Describe the coordinates in terms of pixel blocks.
*
* TODO: pixel blocks are power of two. LLVM should convert rem/div to
* bit arithmetic. Verify this.
*/
if (format_desc->block.width == 1) {
i = bld->zero;
}
else {
LLVMValueRef block_width = lp_build_const_int_vec(bld->type, format_desc->block.width);
i = LLVMBuildURem(bld->builder, x, block_width, "");
x = LLVMBuildUDiv(bld->builder, x, block_width, "");
}
if (format_desc->block.height == 1) {
j = bld->zero;
}
else {
LLVMValueRef block_height = lp_build_const_int_vec(bld->type, format_desc->block.height);
j = LLVMBuildURem(bld->builder, y, block_height, "");
y = LLVMBuildUDiv(bld->builder, y, block_height, "");
}
x_stride = lp_build_const_vec(bld->type, format_desc->block.bits/8);
offset = lp_build_mul(bld, x, x_stride);
lp_build_sample_partial_offset(bld,
format_desc->block.width,
x, x_stride,
&offset, out_i);
if (y && y_stride) {
LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride);
LLVMValueRef y_offset;
lp_build_sample_partial_offset(bld,
format_desc->block.height,
y, y_stride,
&y_offset, out_j);
offset = lp_build_add(bld, offset, y_offset);
}
else {
*out_j = bld->zero;
}
if (z && z_stride) {
LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride);
LLVMValueRef z_offset;
LLVMValueRef k;
lp_build_sample_partial_offset(bld,
1, /* pixel blocks are always 2D */
z, z_stride,
&z_offset, &k);
offset = lp_build_add(bld, offset, z_offset);
}
*out_offset = offset;
*out_i = i;
*out_j = j;
}
@@ -36,6 +36,8 @@
#define LP_BLD_SAMPLE_H
#include "pipe/p_format.h"
#include "gallivm/lp_bld.h"
struct pipe_resource;
@@ -146,6 +148,15 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
const struct pipe_sampler_state *sampler);
void
lp_build_sample_partial_offset(struct lp_build_context *bld,
unsigned block_length,
LLVMValueRef coord,
LLVMValueRef stride,
LLVMValueRef *out_offset,
LLVMValueRef *out_i);
void
lp_build_sample_offset(struct lp_build_context *bld,
const struct util_format_description *format_desc,
+208 -93
View File
@@ -176,6 +176,7 @@ texture_dims(enum pipe_texture_target tex)
case PIPE_TEXTURE_1D:
return 1;
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_CUBE:
return 2;
case PIPE_TEXTURE_3D:
@@ -321,59 +322,6 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
}
/**
* Fetch the texels as <4n x i8> in AoS form.
*/
static LLVMValueRef
lp_build_sample_packed(struct lp_build_sample_context *bld,
LLVMValueRef x,
LLVMValueRef y,
LLVMValueRef y_stride,
LLVMValueRef data_array)
{
LLVMValueRef offset, i, j;
LLVMValueRef data_ptr;
LLVMValueRef res;
/* convert x,y,z coords to linear offset from start of texture, in bytes */
lp_build_sample_offset(&bld->uint_coord_bld,
bld->format_desc,
x, y, NULL, y_stride, NULL,
&offset, &i, &j);
/* get pointer to mipmap level 0 data */
data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
if (util_format_is_rgba8_variant(bld->format_desc)) {
/* Just fetch the data directly without swizzling */
assert(bld->format_desc->block.width == 1);
assert(bld->format_desc->block.height == 1);
assert(bld->format_desc->block.bits <= bld->texel_type.width);
res = lp_build_gather(bld->builder,
bld->texel_type.length,
bld->format_desc->block.bits,
bld->texel_type.width,
data_ptr, offset);
}
else {
struct lp_type type;
assert(bld->texel_type.width == 32);
memset(&type, 0, sizeof type);
type.width = 8;
type.length = bld->texel_type.length*4;
type.norm = TRUE;
res = lp_build_fetch_rgba_aos(bld->builder, bld->format_desc, type,
data_ptr, offset, i, j);
}
return res;
}
/**
* Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
*/
@@ -408,7 +356,7 @@ lp_build_coord_mirror(struct lp_build_sample_context *bld,
/**
* We only support a few wrap modes in lp_build_sample_wrap_int() at this time.
* We only support a few wrap modes in lp_build_sample_wrap_linear_int() at this time.
* Return whether the given mode is supported by that function.
*/
static boolean
@@ -430,13 +378,18 @@ is_simple_wrap_mode(unsigned mode)
* \param length the texture size along one dimension
* \param is_pot if TRUE, length is a power of two
* \param wrap_mode one of PIPE_TEX_WRAP_x
* \param i0 resulting sub-block pixel coordinate for coord0
*/
static LLVMValueRef
lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
LLVMValueRef coord,
LLVMValueRef length,
boolean is_pot,
unsigned wrap_mode)
static void
lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
unsigned block_length,
LLVMValueRef coord,
LLVMValueRef length,
LLVMValueRef stride,
boolean is_pot,
unsigned wrap_mode,
LLVMValueRef *out_offset,
LLVMValueRef *out_i)
{
struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
@@ -469,7 +422,134 @@ lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
assert(0);
}
return coord;
lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
out_offset, out_i);
}
/**
* Build LLVM code for texture wrap mode, for scaled integer texcoords.
* \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size
* \param length the texture size along one dimension
* \param stride pixel stride along the coordinate axis
* \param block_length is the length of the pixel block along the
* coordinate axis
* \param is_pot if TRUE, length is a power of two
* \param wrap_mode one of PIPE_TEX_WRAP_x
* \param offset0 resulting relative offset for coord0
* \param offset1 resulting relative offset for coord0 + 1
* \param i0 resulting sub-block pixel coordinate for coord0
* \param i1 resulting sub-block pixel coordinate for coord0 + 1
*/
static void
lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
unsigned block_length,
LLVMValueRef coord0,
LLVMValueRef length,
LLVMValueRef stride,
boolean is_pot,
unsigned wrap_mode,
LLVMValueRef *offset0,
LLVMValueRef *offset1,
LLVMValueRef *i0,
LLVMValueRef *i1)
{
struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
LLVMValueRef length_minus_one;
LLVMValueRef lmask, umask, mask;
if (block_length != 1) {
/*
* If the pixel block covers more than one pixel then there is no easy
* way to calculate offset1 relative to offset0. Instead, compute them
* independently.
*/
LLVMValueRef coord1;
lp_build_sample_wrap_nearest_int(bld,
block_length,
coord0,
length,
stride,
is_pot,
wrap_mode,
offset0, i0);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
lp_build_sample_wrap_nearest_int(bld,
block_length,
coord1,
length,
stride,
is_pot,
wrap_mode,
offset1, i1);
return;
}
/*
* Scalar pixels -- try to compute offset0 and offset1 with a single stride
* multiplication.
*/
*i0 = uint_coord_bld->zero;
*i1 = uint_coord_bld->zero;
length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
switch(wrap_mode) {
case PIPE_TEX_WRAP_REPEAT:
if (is_pot) {
coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
}
else {
/* Signed remainder won't give the right results for negative
* dividends but unsigned remainder does.*/
coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
}
mask = lp_build_compare(bld->builder, int_coord_bld->type,
PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
*offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
*offset1 = LLVMBuildAnd(bld->builder,
lp_build_add(uint_coord_bld, *offset0, stride),
mask, "");
break;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
PIPE_FUNC_LESS, coord0, length_minus_one);
coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
mask = LLVMBuildAnd(bld->builder, lmask, umask, "");
*offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
*offset1 = lp_build_add(uint_coord_bld,
*offset0,
LLVMBuildAnd(bld->builder, stride, mask, ""));
break;
case PIPE_TEX_WRAP_CLAMP:
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
case PIPE_TEX_WRAP_MIRROR_REPEAT:
case PIPE_TEX_WRAP_MIRROR_CLAMP:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
default:
assert(0);
*offset0 = uint_coord_bld->zero;
*offset1 = uint_coord_bld->zero;
break;
}
}
@@ -1740,16 +1820,21 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
LLVMValueRef i32_c8, i32_c128, i32_c255;
LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
LLVMValueRef x0, x1;
LLVMValueRef y0, y1;
LLVMValueRef neighbors[2][2];
LLVMValueRef data_ptr;
LLVMValueRef x_stride, y_stride;
LLVMValueRef x_offset0, x_offset1;
LLVMValueRef y_offset0, y_offset1;
LLVMValueRef offset[2][2];
LLVMValueRef x_subcoord[2], y_subcoord[2];
LLVMValueRef neighbors_lo[2][2];
LLVMValueRef neighbors_hi[2][2];
LLVMValueRef packed, packed_lo, packed_hi;
LLVMValueRef unswizzled[4];
LLVMValueRef stride;
const unsigned level = 0;
unsigned i, j;
assert(bld->static_state->target == PIPE_TEXTURE_2D);
assert(bld->static_state->target == PIPE_TEXTURE_2D
|| bld->static_state->target == PIPE_TEXTURE_RECT);
assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR);
assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR);
assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE);
@@ -1793,21 +1878,30 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
x0 = s_ipart;
y0 = t_ipart;
x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
bld->format_desc->block.bits/8);
x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
y_stride = lp_build_get_const_level_stride_vec(bld, stride_array, level);
x0 = lp_build_sample_wrap_int(bld, x0, width, bld->static_state->pot_width,
bld->static_state->wrap_s);
y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height,
bld->static_state->wrap_t);
lp_build_sample_wrap_linear_int(bld,
bld->format_desc->block.width,
s_ipart, width, x_stride,
bld->static_state->pot_width,
bld->static_state->wrap_s,
&x_offset0, &x_offset1,
&x_subcoord[0], &x_subcoord[1]);
lp_build_sample_wrap_linear_int(bld,
bld->format_desc->block.height,
t_ipart, height, y_stride,
bld->static_state->pot_height,
bld->static_state->wrap_t,
&y_offset0, &y_offset1,
&y_subcoord[0], &y_subcoord[1]);
x1 = lp_build_sample_wrap_int(bld, x1, width, bld->static_state->pot_width,
bld->static_state->wrap_s);
y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height,
bld->static_state->wrap_t);
offset[0][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset0);
offset[0][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset0);
offset[1][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset1);
offset[1][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset1);
/*
* Transform 4 x i32 in
@@ -1836,7 +1930,6 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
LLVMValueRef shuffle_lo;
LLVMValueRef shuffle_hi;
unsigned i, j;
for(j = 0; j < h16.type.length; j += 4) {
#ifdef PIPE_ARCH_LITTLE_ENDIAN
@@ -1864,7 +1957,10 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
}
stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0);
/*
* get pointer to mipmap level 0 data
*/
data_ptr = lp_build_get_const_mipmap_level(bld, data_array, level);
/*
* Fetch the pixels as 4 x 32bit (rgba order might differ):
@@ -1883,20 +1979,38 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
* The higher 8 bits of the resulting elements will be zero.
*/
neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array);
neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array);
neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array);
neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array);
for (j = 0; j < 2; ++j) {
for (i = 0; i < 2; ++i) {
LLVMValueRef rgba8;
neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
if (util_format_is_rgba8_variant(bld->format_desc)) {
/*
* Given the format is a rgba8, just read the pixels as is,
* without any swizzling. Swizzling will be done later.
*/
rgba8 = lp_build_gather(bld->builder,
bld->texel_type.length,
bld->format_desc->block.bits,
bld->texel_type.width,
data_ptr, offset[j][i]);
lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
}
else {
rgba8 = lp_build_fetch_rgba_aos(bld->builder,
bld->format_desc,
u8n.type,
data_ptr, offset[j][i],
x_subcoord[i],
y_subcoord[j]);
}
lp_build_unpack2(builder, u8n.type, h16.type,
rgba8,
&neighbors_lo[j][i], &neighbors_hi[j][i]);
}
}
/*
* Linear interpolate with 8.8 fixed point.
@@ -2077,7 +2191,8 @@ lp_build_sample_soa(LLVMBuilderRef builder,
}
else if (util_format_fits_8unorm(bld.format_desc) &&
bld.format_desc->nr_channels > 1 &&
static_state->target == PIPE_TEXTURE_2D &&
(static_state->target == PIPE_TEXTURE_2D ||
static_state->target == PIPE_TEXTURE_RECT) &&
static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
@@ -200,8 +200,10 @@ static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
}
mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
assert(LLVMTypeOf(val) == mask->int_vec_type);
mask->cond_mask = val;
mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
mask->cond_mask,
val,
"");
lp_exec_mask_update(mask);
}
@@ -802,7 +804,7 @@ emit_store(
case TGSI_FILE_PREDICATE:
lp_exec_mask_store(&bld->exec_mask, pred, value,
bld->preds[index][chan_index]);
bld->preds[reg->Register.Index][chan_index]);
break;
default:
+4 -4
View File
@@ -128,16 +128,16 @@ struct lp_build_context
*/
struct lp_type type;
/** Same as lp_build_undef(type) */
/** Same as lp_build_elem_type(type) */
LLVMTypeRef elem_type;
/** Same as lp_build_undef(type) */
/** Same as lp_build_vec_type(type) */
LLVMTypeRef vec_type;
/** Same as lp_build_undef(type) */
/** Same as lp_build_int_elem_type(type) */
LLVMTypeRef int_elem_type;
/** Same as lp_build_undef(type) */
/** Same as lp_build_int_vec_type(type) */
LLVMTypeRef int_vec_type;
/** Same as lp_build_undef(type) */
+58
View File
@@ -0,0 +1,58 @@
/**************************************************************************
*
* Copyright 2010 Luca Barbieri
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#include "pipe/p_config.h"
#include "os_stream.h"
#include "util/u_memory.h"
#include "util/u_string.h"
int
os_default_stream_vprintf (struct os_stream* stream, const char *format, va_list ap)
{
char buf[1024];
int retval;
va_list ap2;
va_copy(ap2, ap);
retval = util_vsnprintf(buf, sizeof(buf), format, ap2);
va_end(ap2);
if(retval <= 0)
{}
else if(retval < sizeof(buf))
stream->write(stream, buf, retval);
else
{
char* str = MALLOC(retval + 1);
if(!str)
return -1;
retval = util_vsnprintf(str, retval + 1, format, ap);
if(retval > 0)
stream->write(stream, str, retval);
FREE(str);
}
return retval;
}
+24 -1
View File
@@ -50,6 +50,9 @@ struct os_stream
void
(*flush)(struct os_stream *stream);
int
(*vprintf)(struct os_stream *stream, const char* format, va_list ap);
};
@@ -90,6 +93,27 @@ os_stream_flush(struct os_stream *stream)
stream->flush(stream);
}
int
os_default_stream_vprintf (struct os_stream* stream, const char *format, va_list ap);
static INLINE int
os_stream_vprintf (struct os_stream* stream, const char *format, va_list ap)
{
return stream->vprintf(stream, format, ap);
}
static INLINE int
os_stream_printf (struct os_stream* stream, const char *format, ...)
{
int retval;
va_list args;
va_start (args, format);
retval = stream->vprintf(stream, format, args);
va_end (args);
return retval;
}
struct os_stream *
os_file_stream_create(const char *filename);
@@ -118,5 +142,4 @@ os_str_stream_get_and_close(struct os_stream *stream);
#define os_file_stream_create(_filename) os_null_stream_create()
#endif
#endif /* _OS_STREAM_H_ */
+2 -1
View File
@@ -73,7 +73,8 @@ static struct os_stream
os_log_stream_struct = {
&os_log_stream_close,
&os_log_stream_write,
&os_log_stream_flush
&os_log_stream_flush,
&os_default_stream_vprintf,
};
+7 -1
View File
@@ -56,12 +56,18 @@ os_null_stream_flush(struct os_stream *stream)
(void)stream;
}
static int
os_null_stream_vprintf (struct os_stream* stream, const char *format, va_list ap)
{
return 0;
}
static struct os_stream
os_null_stream = {
&os_null_stream_close,
&os_null_stream_write,
&os_null_stream_flush
&os_null_stream_flush,
&os_null_stream_vprintf
};
@@ -83,6 +83,14 @@ os_stdc_stream_flush(struct os_stream *_stream)
fflush(stream->file);
}
static int
os_stdc_stream_vprintf (struct os_stream* _stream, const char *format, va_list ap)
{
struct os_stdc_stream *stream = os_stdc_stream(_stream);
return vfprintf(stream->file, format, ap);
}
struct os_stream *
os_file_stream_create(const char *filename)
@@ -96,6 +104,7 @@ os_file_stream_create(const char *filename)
stream->base.close = &os_stdc_stream_close;
stream->base.write = &os_stdc_stream_write;
stream->base.flush = &os_stdc_stream_flush;
stream->base.vprintf = &os_stdc_stream_vprintf;
stream->file = fopen(filename, "w");
if(!stream->file)
+1
View File
@@ -118,6 +118,7 @@ os_str_stream_create(size_t size)
stream->base.close = &os_str_stream_close;
stream->base.write = &os_str_stream_write;
stream->base.flush = &os_str_stream_flush;
stream->base.vprintf = &os_default_stream_vprintf;
stream->str = os_malloc(size);
if(!stream->str)
+1 -2
View File
@@ -50,8 +50,7 @@
#define PB_BUFMGR_H_
#include "pipe/p_compiler.h"
#include "pipe/p_defines.h"
#include "pb_buffer.h"
#ifdef __cplusplus
+3 -3
View File
@@ -30,7 +30,7 @@
#include "rtasm_cpu.h"
#if defined(PIPE_ARCH_X86)
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
static boolean rtasm_sse_enabled(void)
{
static boolean firsttime = 1;
@@ -49,7 +49,7 @@ static boolean rtasm_sse_enabled(void)
int rtasm_cpu_has_sse(void)
{
/* FIXME: actually detect this at run-time */
#if defined(PIPE_ARCH_X86)
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
return rtasm_sse_enabled();
#else
return 0;
@@ -59,7 +59,7 @@ int rtasm_cpu_has_sse(void)
int rtasm_cpu_has_sse2(void)
{
/* FIXME: actually detect this at run-time */
#if defined(PIPE_ARCH_X86)
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
return rtasm_sse_enabled();
#else
return 0;
+470 -29
View File
@@ -22,8 +22,9 @@
**************************************************************************/
#include "pipe/p_config.h"
#include "util/u_cpu_detect.h"
#if defined(PIPE_ARCH_X86)
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
#include "pipe/p_compiler.h"
#include "util/u_debug.h"
@@ -231,6 +232,10 @@ static void emit_modrm( struct x86_function *p,
assert(reg.mod == mod_REG);
/* TODO: support extended x86-64 registers */
assert(reg.idx < 8);
assert(regmem.idx < 8);
val |= regmem.mod << 6; /* mod field */
val |= reg.idx << 3; /* reg field */
val |= regmem.idx; /* r/m field */
@@ -363,6 +368,12 @@ int x86_get_label( struct x86_function *p )
*/
void x64_rexw(struct x86_function *p)
{
if(x86_target(p) != X86_32)
emit_1ub(p, 0x48);
}
void x86_jcc( struct x86_function *p,
enum x86_cc cc,
int label )
@@ -449,6 +460,52 @@ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
emit_1i(p, imm);
}
void x86_mov_imm( struct x86_function *p, struct x86_reg dst, int imm )
{
DUMP_RI( dst, imm );
if(dst.mod == mod_REG)
x86_mov_reg_imm(p, dst, imm);
else
{
emit_1ub(p, 0xc7);
emit_modrm_noreg(p, 0, dst);
emit_1i(p, imm);
}
}
void x86_mov16_imm( struct x86_function *p, struct x86_reg dst, uint16_t imm )
{
DUMP_RI( dst, imm );
emit_1ub(p, 0x66);
if(dst.mod == mod_REG)
{
emit_1ub(p, 0xb8 + dst.idx);
emit_2ub(p, imm & 0xff, imm >> 8);
}
else
{
emit_1ub(p, 0xc7);
emit_modrm_noreg(p, 0, dst);
emit_2ub(p, imm & 0xff, imm >> 8);
}
}
void x86_mov8_imm( struct x86_function *p, struct x86_reg dst, uint8_t imm )
{
DUMP_RI( dst, imm );
if(dst.mod == mod_REG)
{
emit_1ub(p, 0xb0 + dst.idx);
emit_1ub(p, imm);
}
else
{
emit_1ub(p, 0xc6);
emit_modrm_noreg(p, 0, dst);
emit_1ub(p, imm);
}
}
/**
* Immediate group 1 instructions.
*/
@@ -520,7 +577,7 @@ void x86_push( struct x86_function *p,
}
p->stack_offset += 4;
p->stack_offset += sizeof(void*);
}
void x86_push_imm32( struct x86_function *p,
@@ -530,7 +587,7 @@ void x86_push_imm32( struct x86_function *p,
emit_1ub(p, 0x68);
emit_1i(p, imm32);
p->stack_offset += 4;
p->stack_offset += sizeof(void*);
}
@@ -540,23 +597,33 @@ void x86_pop( struct x86_function *p,
DUMP_R( reg );
assert(reg.mod == mod_REG);
emit_1ub(p, 0x58 + reg.idx);
p->stack_offset -= 4;
p->stack_offset -= sizeof(void*);
}
void x86_inc( struct x86_function *p,
struct x86_reg reg )
{
DUMP_R( reg );
assert(reg.mod == mod_REG);
emit_1ub(p, 0x40 + reg.idx);
if(x86_target(p) == X86_32 && reg.mod == mod_REG)
{
emit_1ub(p, 0x40 + reg.idx);
return;
}
emit_1ub(p, 0xff);
emit_modrm_noreg(p, 0, reg);
}
void x86_dec( struct x86_function *p,
struct x86_reg reg )
{
DUMP_R( reg );
assert(reg.mod == mod_REG);
emit_1ub(p, 0x48 + reg.idx);
if(x86_target(p) == X86_32 && reg.mod == mod_REG)
{
emit_1ub(p, 0x48 + reg.idx);
return;
}
emit_1ub(p, 0xff);
emit_modrm_noreg(p, 1, reg);
}
void x86_ret( struct x86_function *p )
@@ -583,9 +650,82 @@ void x86_mov( struct x86_function *p,
struct x86_reg src )
{
DUMP_RR( dst, src );
/* special hack for reading arguments until we support x86-64 registers everywhere */
if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8))
{
uint8_t rex = 0x40;
if(dst.idx >= 8)
{
rex |= 4;
dst.idx -= 8;
}
if(src.idx >= 8)
{
rex |= 1;
src.idx -= 8;
}
emit_1ub(p, rex);
}
emit_op_modrm( p, 0x8b, 0x89, dst, src );
}
void x86_mov16( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_1ub(p, 0x66);
emit_op_modrm( p, 0x8b, 0x89, dst, src );
}
void x86_mov8( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_op_modrm( p, 0x8a, 0x88, dst, src );
}
void x64_mov64( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
uint8_t rex = 0x48;
DUMP_RR( dst, src );
assert(x86_target(p) != X86_32);
/* special hack for reading arguments until we support x86-64 registers everywhere */
if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8))
{
if(dst.idx >= 8)
{
rex |= 4;
dst.idx -= 8;
}
if(src.idx >= 8)
{
rex |= 1;
src.idx -= 8;
}
}
emit_1ub(p, rex);
emit_op_modrm( p, 0x8b, 0x89, dst, src );
}
void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR( dst, src );
emit_2ub(p, 0x0f, 0xb6);
emit_modrm(p, dst, src);
}
void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR( dst, src );
emit_2ub(p, 0x0f, 0xb7);
emit_modrm(p, dst, src);
}
void x86_xor( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
@@ -680,6 +820,61 @@ void x86_div( struct x86_function *p,
emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src);
}
void x86_bswap( struct x86_function *p, struct x86_reg reg )
{
DUMP_R(reg);
assert(reg.file == file_REG32);
assert(reg.mod == mod_REG);
emit_2ub(p, 0x0f, 0xc8 + reg.idx);
}
void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
{
DUMP_RI(reg, imm);
if(imm == 1)
{
emit_1ub(p, 0xd1);
emit_modrm_noreg(p, 5, reg);
}
else
{
emit_1ub(p, 0xc1);
emit_modrm_noreg(p, 5, reg);
emit_1ub(p, imm);
}
}
void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
{
DUMP_RI(reg, imm);
if(imm == 1)
{
emit_1ub(p, 0xd1);
emit_modrm_noreg(p, 7, reg);
}
else
{
emit_1ub(p, 0xc1);
emit_modrm_noreg(p, 7, reg);
emit_1ub(p, imm);
}
}
void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
{
DUMP_RI(reg, imm);
if(imm == 1)
{
emit_1ub(p, 0xd1);
emit_modrm_noreg(p, 4, reg);
}
else
{
emit_1ub(p, 0xc1);
emit_modrm_noreg(p, 4, reg);
emit_1ub(p, imm);
}
}
/***********************************************************************
@@ -1013,6 +1208,77 @@ void sse_movmskps( struct x86_function *p,
* SSE2 instructions
*/
void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR(dst, src);
emit_2ub(p, 0x66, 0x0f);
if(dst.mod == mod_REG && dst.file == file_REG32)
{
emit_1ub(p, 0x7e);
emit_modrm(p, src, dst);
}
else
{
emit_op_modrm(p, 0x6e, 0x7e, dst, src);
}
}
void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR(dst, src);
switch (dst.mod) {
case mod_REG:
emit_3ub(p, 0xf3, 0x0f, 0x7e);
emit_modrm(p, dst, src);
break;
case mod_INDIRECT:
case mod_DISP32:
case mod_DISP8:
assert(src.mod == mod_REG);
emit_3ub(p, 0x66, 0x0f, 0xd6);
emit_modrm(p, src, dst);
break;
default:
assert(0);
break;
}
}
void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR(dst, src);
emit_2ub(p, 0xf3, 0x0f);
emit_op_modrm(p, 0x6f, 0x7f, dst, src);
}
void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR(dst, src);
emit_2ub(p, 0x66, 0x0f);
emit_op_modrm(p, 0x6f, 0x7f, dst, src);
}
void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR(dst, src);
emit_2ub(p, 0xf2, 0x0f);
emit_op_modrm(p, 0x10, 0x11, dst, src);
}
void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR(dst, src);
emit_2ub(p, 0x66, 0x0f);
emit_op_modrm(p, 0x10, 0x11, dst, src);
}
void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR(dst, src);
emit_2ub(p, 0x66, 0x0f);
emit_op_modrm(p, 0x28, 0x29, dst, src);
}
/**
* Perform a reduced swizzle:
*/
@@ -1027,6 +1293,28 @@ void sse2_pshufd( struct x86_function *p,
emit_1ub(p, shuf);
}
void sse2_pshuflw( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src,
unsigned char shuf)
{
DUMP_RRI( dst, src, shuf );
emit_3ub(p, 0xf2, X86_TWOB, 0x70);
emit_modrm(p, dst, src);
emit_1ub(p, shuf);
}
void sse2_pshufhw( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src,
unsigned char shuf)
{
DUMP_RRI( dst, src, shuf );
emit_3ub(p, 0xf3, X86_TWOB, 0x70);
emit_modrm(p, dst, src);
emit_1ub(p, shuf);
}
void sse2_cvttps2dq( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
@@ -1045,6 +1333,24 @@ void sse2_cvtps2dq( struct x86_function *p,
emit_modrm( p, dst, src );
}
void sse2_cvtsd2ss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_3ub(p, 0xf2, 0x0f, 0x5a);
emit_modrm( p, dst, src );
}
void sse2_cvtpd2ps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_3ub(p, 0x66, 0x0f, 0x5a);
emit_modrm( p, dst, src );
}
void sse2_packssdw( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
@@ -1081,6 +1387,97 @@ void sse2_punpcklbw( struct x86_function *p,
emit_modrm( p, dst, src );
}
void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR( dst, src );
emit_3ub(p, 0x66, 0x0f, 0x61);
emit_modrm( p, dst, src );
}
void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR( dst, src );
emit_3ub(p, 0x66, 0x0f, 0x62);
emit_modrm( p, dst, src );
}
void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR( dst, src );
emit_3ub(p, 0x66, 0x0f, 0x6c);
emit_modrm( p, dst, src );
}
void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
{
DUMP_RI(dst, imm);
emit_3ub(p, 0x66, 0x0f, 0x71);
emit_modrm_noreg(p, 6, dst);
emit_1ub(p, imm);
}
void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
{
DUMP_RI(dst, imm);
emit_3ub(p, 0x66, 0x0f, 0x72);
emit_modrm_noreg(p, 6, dst);
emit_1ub(p, imm);
}
void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
{
DUMP_RI(dst, imm);
emit_3ub(p, 0x66, 0x0f, 0x73);
emit_modrm_noreg(p, 6, dst);
emit_1ub(p, imm);
}
void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
{
DUMP_RI(dst, imm);
emit_3ub(p, 0x66, 0x0f, 0x71);
emit_modrm_noreg(p, 2, dst);
emit_1ub(p, imm);
}
void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
{
DUMP_RI(dst, imm);
emit_3ub(p, 0x66, 0x0f, 0x72);
emit_modrm_noreg(p, 2, dst);
emit_1ub(p, imm);
}
void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
{
DUMP_RI(dst, imm);
emit_3ub(p, 0x66, 0x0f, 0x73);
emit_modrm_noreg(p, 2, dst);
emit_1ub(p, imm);
}
void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
{
DUMP_RI(dst, imm);
emit_3ub(p, 0x66, 0x0f, 0x71);
emit_modrm_noreg(p, 4, dst);
emit_1ub(p, imm);
}
void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
{
DUMP_RI(dst, imm);
emit_3ub(p, 0x66, 0x0f, 0x72);
emit_modrm_noreg(p, 4, dst);
emit_1ub(p, imm);
}
void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR(dst, src);
emit_3ub(p, 0x66, 0x0f, 0xeb);
emit_modrm(p, dst, src);
}
void sse2_rcpps( struct x86_function *p,
struct x86_reg dst,
@@ -1100,18 +1497,6 @@ void sse2_rcpss( struct x86_function *p,
emit_modrm( p, dst, src );
}
void sse2_movd( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_2ub(p, 0x66, X86_TWOB);
emit_op_modrm( p, 0x6e, 0x7e, dst, src );
}
/***********************************************************************
* x87 instructions
*/
@@ -1702,23 +2087,80 @@ void x86_cdecl_caller_pop_regs( struct x86_function *p )
}
/* Retreive a reference to one of the function arguments, taking into
* account any push/pop activity:
*/
struct x86_reg x86_fn_arg( struct x86_function *p,
unsigned arg )
unsigned arg )
{
return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
switch(x86_target(p))
{
case X86_64_WIN64_ABI:
/* Microsoft uses a different calling convention than the rest of the world */
switch(arg)
{
case 1:
return x86_make_reg(file_REG32, reg_CX);
case 2:
return x86_make_reg(file_REG32, reg_DX);
case 3:
return x86_make_reg(file_REG32, reg_R8);
case 4:
return x86_make_reg(file_REG32, reg_R9);
default:
/* Win64 allocates stack slots as if it pushed the first 4 arguments too */
return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
p->stack_offset + arg * 8);
}
case X86_64_STD_ABI:
switch(arg)
{
case 1:
return x86_make_reg(file_REG32, reg_DI);
case 2:
return x86_make_reg(file_REG32, reg_SI);
case 3:
return x86_make_reg(file_REG32, reg_DX);
case 4:
return x86_make_reg(file_REG32, reg_CX);
case 5:
return x86_make_reg(file_REG32, reg_R8);
case 6:
return x86_make_reg(file_REG32, reg_R9);
default:
return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
p->stack_offset + (arg - 6) * 8); /* ??? */
}
case X86_32:
return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
p->stack_offset + arg * 4); /* ??? */
default:
abort();
}
}
static void x86_init_func_common( struct x86_function *p )
{
util_cpu_detect();
p->caps = 0;
if(util_cpu_caps.has_mmx)
p->caps |= X86_MMX;
if(util_cpu_caps.has_mmx2)
p->caps |= X86_MMX2;
if(util_cpu_caps.has_sse)
p->caps |= X86_SSE;
if(util_cpu_caps.has_sse2)
p->caps |= X86_SSE2;
if(util_cpu_caps.has_sse3)
p->caps |= X86_SSE3;
if(util_cpu_caps.has_sse4_1)
p->caps |= X86_SSE4_1;
p->csr = p->store;
DUMP_START();
}
void x86_init_func( struct x86_function *p )
{
p->size = 0;
p->store = NULL;
p->csr = p->store;
DUMP_START();
x86_init_func_common(p);
}
void x86_init_func_size( struct x86_function *p, unsigned code_size )
@@ -1728,8 +2170,7 @@ void x86_init_func_size( struct x86_function *p, unsigned code_size )
if (p->store == NULL) {
p->store = p->error_overflow;
}
p->csr = p->store;
DUMP_START();
x86_init_func_common(p);
}
void x86_release_func( struct x86_function *p )
+94 -6
View File
@@ -24,22 +24,31 @@
#ifndef _RTASM_X86SSE_H_
#define _RTASM_X86SSE_H_
#include "pipe/p_compiler.h"
#include "pipe/p_config.h"
#if defined(PIPE_ARCH_X86)
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
/* It is up to the caller to ensure that instructions issued are
* suitable for the host cpu. There are no checks made in this module
* for mmx/sse/sse2 support on the cpu.
*/
struct x86_reg {
unsigned file:3;
unsigned idx:3;
unsigned file:2;
unsigned idx:4;
unsigned mod:2; /* mod_REG if this is just a register */
int disp:24; /* only +/- 23bits of offset - should be enough... */
};
#define X86_MMX 1
#define X86_MMX2 2
#define X86_SSE 4
#define X86_SSE2 8
#define X86_SSE3 0x10
#define X86_SSE4_1 0x20
struct x86_function {
unsigned caps;
unsigned size;
unsigned char *store;
unsigned char *csr;
@@ -75,7 +84,15 @@ enum x86_reg_name {
reg_SP,
reg_BP,
reg_SI,
reg_DI
reg_DI,
reg_R8,
reg_R9,
reg_R10,
reg_R11,
reg_R12,
reg_R13,
reg_R14,
reg_R15
};
@@ -110,6 +127,29 @@ typedef void (*x86_func)(void);
/* Begin/end/retrieve function creation:
*/
enum x86_target
{
X86_32,
X86_64_STD_ABI,
X86_64_WIN64_ABI
};
/* make this read a member of x86_function if target != host is desired */
static INLINE enum x86_target x86_target( struct x86_function* p )
{
#ifdef PIPE_ARCH_X86
return X86_32;
#elif defined(_WIN64)
return X86_64_WIN64_ABI;
#elif defined(PIPE_ARCH_X86_64)
return X86_64_STD_ABI;
#endif
}
static INLINE unsigned x86_target_caps( struct x86_function* p )
{
return p->caps;
}
void x86_init_func( struct x86_function *p );
void x86_init_func_size( struct x86_function *p, unsigned code_size );
@@ -138,6 +178,8 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg );
*/
int x86_get_label( struct x86_function *p );
void x64_rexw(struct x86_function *p);
void x86_jcc( struct x86_function *p,
enum x86_cc cc,
int label );
@@ -178,18 +220,54 @@ void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvtsd2ss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvtpd2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
unsigned char shuf );
void sse2_pshuflw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
unsigned char shuf );
void sse2_pshufhw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
unsigned char shuf );
void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_pshuflw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
void sse2_pshufhw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
void sse2_pshufd( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr);
void sse_prefetch0( struct x86_function *p, struct x86_reg ptr);
@@ -227,7 +305,6 @@ void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg
void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);
void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -237,6 +314,14 @@ void x86_dec( struct x86_function *p, struct x86_reg reg );
void x86_inc( struct x86_function *p, struct x86_reg reg );
void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x64_mov64( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mov8( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mov16( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mov_imm(struct x86_function *p, struct x86_reg dst, int imm );
void x86_mov8_imm(struct x86_function *p, struct x86_reg dst, uint8_t imm );
void x86_mov16_imm(struct x86_function *p, struct x86_reg dst, uint16_t imm );
void x86_mul( struct x86_function *p, struct x86_reg src );
void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -250,7 +335,10 @@ void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_sahf( struct x86_function *p );
void x86_div( struct x86_function *p, struct x86_reg src );
void x86_bswap( struct x86_function *p, struct x86_reg src );
void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
void x86_cdecl_caller_push_regs( struct x86_function *p );
void x86_cdecl_caller_pop_regs( struct x86_function *p );
+1
View File
@@ -28,6 +28,7 @@
#ifndef TGSI_DUMP_H
#define TGSI_DUMP_H
#include "pipe/p_compiler.h"
#include "pipe/p_shader_tokens.h"
#if defined __cplusplus
+5
View File
@@ -3239,6 +3239,8 @@ exec_instruction(
if (mach->CallStackTop == 0) {
/* returning from main() */
mach->CondStackTop = 0;
mach->LoopStackTop = 0;
*pc = -1;
return;
}
@@ -3767,6 +3769,9 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
}
#endif
/* Strictly speaking, these assertions aren't really needed but they
* can potentially catch some bugs in the control flow code.
*/
assert(mach->CondStackTop == 0);
assert(mach->LoopStackTop == 0);
assert(mach->ContStackTop == 0);
+1
View File
@@ -28,6 +28,7 @@
#ifndef TGSI_INFO_H
#define TGSI_INFO_H
#include "pipe/p_compiler.h"
#include "pipe/p_shader_tokens.h"
#if defined __cplusplus
+16 -11
View File
@@ -282,17 +282,6 @@ tgsi_parse_token(
}
unsigned
tgsi_num_tokens(const struct tgsi_token *tokens)
{
struct tgsi_parse_context ctx;
if (tgsi_parse_init(&ctx, tokens) == TGSI_PARSE_OK) {
unsigned len = (ctx.FullHeader.Header.HeaderSize +
ctx.FullHeader.Header.BodySize);
return len;
}
return 0;
}
/**
@@ -319,3 +308,19 @@ tgsi_alloc_tokens(unsigned num_tokens)
unsigned bytes = num_tokens * sizeof(struct tgsi_token);
return (struct tgsi_token *) MALLOC(bytes);
}
void
tgsi_dump_tokens(const struct tgsi_token *tokens)
{
const unsigned *dwords = (const unsigned *)tokens;
int nr = tgsi_num_tokens(tokens);
int i;
assert(sizeof(*tokens) == sizeof(unsigned));
debug_printf("const unsigned tokens[%d] = {\n", nr);
for (i = 0; i < nr; i++)
debug_printf("0x%08x,\n", dwords[i]);
debug_printf("};\n");
}
+10 -2
View File
@@ -28,6 +28,7 @@
#ifndef TGSI_PARSE_H
#define TGSI_PARSE_H
#include "pipe/p_compiler.h"
#include "pipe/p_shader_tokens.h"
#if defined __cplusplus
@@ -132,8 +133,15 @@ void
tgsi_parse_token(
struct tgsi_parse_context *ctx );
unsigned
tgsi_num_tokens(const struct tgsi_token *tokens);
static INLINE unsigned
tgsi_num_tokens(const struct tgsi_token *tokens)
{
struct tgsi_header header = *(const struct tgsi_header *) tokens;
return header.HeaderSize + header.BodySize;
}
void
tgsi_dump_tokens(const struct tgsi_token *tokens);
struct tgsi_token *
tgsi_dup_tokens(const struct tgsi_token *tokens);
+4 -1
View File
@@ -32,9 +32,12 @@
extern "C" {
#endif
#include "pipe/p_compiler.h"
struct tgsi_exec_machine;
struct tgsi_interp_coef;
struct tgsi_token;
struct x86_function;
struct tgsi_interp_coef;
unsigned
tgsi_emit_sse2(
+1 -1
View File
@@ -38,7 +38,7 @@ struct translate *translate_create( const struct translate_key *key )
{
struct translate *translate = NULL;
#if defined(PIPE_ARCH_X86)
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
translate = translate_sse2_create( key );
if (translate)
return translate;
@@ -85,6 +85,18 @@ struct translate {
unsigned instance_id,
void *output_buffer);
void (PIPE_CDECL *run_elts16)( struct translate *,
const uint16_t *elts,
unsigned count,
unsigned instance_id,
void *output_buffer);
void (PIPE_CDECL *run_elts8)( struct translate *,
const uint8_t *elts,
unsigned count,
unsigned instance_id,
void *output_buffer);
void (PIPE_CDECL *run)( struct translate *,
unsigned start,
unsigned count,
@@ -64,6 +64,14 @@ struct translate_generic {
unsigned input_stride;
unsigned max_index;
/* this value is set to -1 if this is a normal element with output_format != input_format:
* in this case, u_format is used to do a full conversion
*
* this value is set to the format size in bytes if output_format == input_format or for 32-bit instance ids:
* in this case, memcpy is used to copy this amount of bytes
*/
int copy_size;
} attrib[PIPE_MAX_ATTRIBS];
unsigned nr_attrib;
@@ -354,7 +362,65 @@ static emit_func get_emit_func( enum pipe_format format )
}
}
static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic *tg,
unsigned elt,
unsigned instance_id,
void *vert )
{
unsigned nr_attrs = tg->nr_attrib;
unsigned attr;
for (attr = 0; attr < nr_attrs; attr++) {
float data[4];
uint8_t *dst = (uint8_t *)vert + tg->attrib[attr].output_offset;
if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
const uint8_t *src;
unsigned index;
int copy_size;
if (tg->attrib[attr].instance_divisor) {
index = instance_id / tg->attrib[attr].instance_divisor;
}
else {
index = elt;
}
/* clamp to void going out of bounds */
index = MIN2(index, tg->attrib[attr].max_index);
src = tg->attrib[attr].input_ptr +
tg->attrib[attr].input_stride * index;
copy_size = tg->attrib[attr].copy_size;
if(likely(copy_size >= 0))
memcpy(dst, src, copy_size);
else
{
tg->attrib[attr].fetch( data, src, 0, 0 );
if (0)
debug_printf("Fetch linear attr %d from %p stride %d index %d: "
" %f, %f, %f, %f \n",
attr,
tg->attrib[attr].input_ptr,
tg->attrib[attr].input_stride,
index,
data[0], data[1],data[2], data[3]);
tg->attrib[attr].emit( data, dst );
}
} else {
if(likely(tg->attrib[attr].copy_size >= 0))
memcpy(data, &instance_id, 4);
else
{
data[0] = (float)instance_id;
tg->attrib[attr].emit( data, dst );
}
}
}
}
/**
* Fetch vertex attributes for 'count' vertices.
@@ -367,62 +433,45 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
{
struct translate_generic *tg = translate_generic(translate);
char *vert = output_buffer;
unsigned nr_attrs = tg->nr_attrib;
unsigned attr;
unsigned i;
/* loop over vertex attributes (vertex shader inputs)
*/
for (i = 0; i < count; i++) {
const unsigned elt = *elts++;
for (attr = 0; attr < nr_attrs; attr++) {
float data[4];
char *dst = vert + tg->attrib[attr].output_offset;
if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
const uint8_t *src;
unsigned index;
if (tg->attrib[attr].instance_divisor) {
index = instance_id / tg->attrib[attr].instance_divisor;
} else {
index = elt;
}
/* clamp to void going out of bounds */
index = MIN2(index, tg->attrib[attr].max_index);
src = tg->attrib[attr].input_ptr +
tg->attrib[attr].input_stride * index;
tg->attrib[attr].fetch( data, src, 0, 0 );
if (0)
debug_printf("Fetch elt attr %d from %p stride %d div %u max %u index %d: "
" %f, %f, %f, %f \n",
attr,
tg->attrib[attr].input_ptr,
tg->attrib[attr].input_stride,
tg->attrib[attr].instance_divisor,
tg->attrib[attr].max_index,
index,
data[0], data[1],data[2], data[3]);
} else {
data[0] = (float)instance_id;
}
if (0)
debug_printf("vert %d/%d attr %d: %f %f %f %f\n",
i, elt, attr, data[0], data[1], data[2], data[3]);
tg->attrib[attr].emit( data, dst );
}
generic_run_one(tg, *elts++, instance_id, vert);
vert += tg->translate.key.output_stride;
}
}
static void PIPE_CDECL generic_run_elts16( struct translate *translate,
const uint16_t *elts,
unsigned count,
unsigned instance_id,
void *output_buffer )
{
struct translate_generic *tg = translate_generic(translate);
char *vert = output_buffer;
unsigned i;
for (i = 0; i < count; i++) {
generic_run_one(tg, *elts++, instance_id, vert);
vert += tg->translate.key.output_stride;
}
}
static void PIPE_CDECL generic_run_elts8( struct translate *translate,
const uint8_t *elts,
unsigned count,
unsigned instance_id,
void *output_buffer )
{
struct translate_generic *tg = translate_generic(translate);
char *vert = output_buffer;
unsigned i;
for (i = 0; i < count; i++) {
generic_run_one(tg, *elts++, instance_id, vert);
vert += tg->translate.key.output_stride;
}
}
static void PIPE_CDECL generic_run( struct translate *translate,
unsigned start,
@@ -432,57 +481,10 @@ static void PIPE_CDECL generic_run( struct translate *translate,
{
struct translate_generic *tg = translate_generic(translate);
char *vert = output_buffer;
unsigned nr_attrs = tg->nr_attrib;
unsigned attr;
unsigned i;
/* loop over vertex attributes (vertex shader inputs)
*/
for (i = 0; i < count; i++) {
unsigned elt = start + i;
for (attr = 0; attr < nr_attrs; attr++) {
float data[4];
char *dst = vert + tg->attrib[attr].output_offset;
if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
const uint8_t *src;
unsigned index;
if (tg->attrib[attr].instance_divisor) {
index = instance_id / tg->attrib[attr].instance_divisor;
}
else {
index = elt;
}
/* clamp to void going out of bounds */
index = MIN2(index, tg->attrib[attr].max_index);
src = tg->attrib[attr].input_ptr +
tg->attrib[attr].input_stride * index;
tg->attrib[attr].fetch( data, src, 0, 0 );
if (0)
debug_printf("Fetch linear attr %d from %p stride %d index %d: "
" %f, %f, %f, %f \n",
attr,
tg->attrib[attr].input_ptr,
tg->attrib[attr].input_stride,
index,
data[0], data[1],data[2], data[3]);
} else {
data[0] = (float)instance_id;
}
if (0)
debug_printf("vert %d attr %d: %f %f %f %f\n",
i, attr, data[0], data[1], data[2], data[3]);
tg->attrib[attr].emit( data, dst );
}
generic_run_one(tg, start + i, instance_id, vert);
vert += tg->translate.key.output_stride;
}
}
@@ -528,6 +530,8 @@ struct translate *translate_generic_create( const struct translate_key *key )
tg->translate.release = generic_release;
tg->translate.set_buffer = generic_set_buffer;
tg->translate.run_elts = generic_run_elts;
tg->translate.run_elts16 = generic_run_elts16;
tg->translate.run_elts8 = generic_run_elts8;
tg->translate.run = generic_run;
for (i = 0; i < key->nr_elements; i++) {
@@ -544,9 +548,28 @@ struct translate *translate_generic_create( const struct translate_key *key )
tg->attrib[i].input_offset = key->element[i].input_offset;
tg->attrib[i].instance_divisor = key->element[i].instance_divisor;
tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
tg->attrib[i].output_offset = key->element[i].output_offset;
tg->attrib[i].copy_size = -1;
if (tg->attrib[i].type == TRANSLATE_ELEMENT_INSTANCE_ID)
{
if(key->element[i].output_format == PIPE_FORMAT_R32_USCALED
|| key->element[i].output_format == PIPE_FORMAT_R32_SSCALED)
tg->attrib[i].copy_size = 4;
}
else
{
if(key->element[i].input_format == key->element[i].output_format
&& format_desc->block.width == 1
&& format_desc->block.height == 1
&& !(format_desc->block.bits & 7))
tg->attrib[i].copy_size = format_desc->block.bits >> 3;
}
if(tg->attrib[i].copy_size < 0)
tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
else
tg->attrib[i].emit = NULL;
}
tg->nr_attrib = key->nr_elements;
File diff suppressed because it is too large Load Diff
+3
View File
@@ -36,6 +36,9 @@
#define U_HANDLE_BITMASK_H_
#include "pipe/p_compiler.h"
#ifdef __cplusplus
extern "C" {
#endif
+95 -35
View File
@@ -42,6 +42,7 @@
#include "util/u_blit.h"
#include "util/u_draw_quad.h"
#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_sampler.h"
@@ -56,15 +57,18 @@ struct blit_state
struct cso_context *cso;
struct pipe_blend_state blend;
struct pipe_depth_stencil_alpha_state depthstencil;
struct pipe_depth_stencil_alpha_state depthstencil_keep;
struct pipe_depth_stencil_alpha_state depthstencil_write;
struct pipe_rasterizer_state rasterizer;
struct pipe_sampler_state sampler;
struct pipe_viewport_state viewport;
struct pipe_clip_state clip;
struct pipe_vertex_element velem[2];
enum pipe_texture_target internal_target;
void *vs;
void *fs[TGSI_WRITEMASK_XYZW + 1];
void *fs_depth;
struct pipe_resource *vbuf; /**< quad vertices */
unsigned vbuf_slot;
@@ -95,7 +99,11 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
ctx->blend.rt[0].colormask = PIPE_MASK_RGBA;
/* no-op depth/stencil/alpha */
memset(&ctx->depthstencil, 0, sizeof(ctx->depthstencil));
memset(&ctx->depthstencil_keep, 0, sizeof(ctx->depthstencil_keep));
memset(&ctx->depthstencil_write, 0, sizeof(ctx->depthstencil_write));
ctx->depthstencil_write.depth.enabled = 1;
ctx->depthstencil_write.depth.writemask = 1;
ctx->depthstencil_write.depth.func = PIPE_FUNC_ALWAYS;
/* rasterizer */
memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer));
@@ -110,7 +118,6 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
ctx->sampler.min_img_filter = 0; /* set later */
ctx->sampler.mag_img_filter = 0; /* set later */
ctx->sampler.normalized_coords = 1;
/* vertex elements state */
memset(&ctx->velem[0], 0, sizeof(ctx->velem[0]) * 2);
@@ -145,6 +152,11 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
ctx->vertices[i][1][3] = 1.0f; /* q */
}
if(pipe->screen->get_param(pipe->screen, PIPE_CAP_NPOT_TEXTURES))
ctx->internal_target = PIPE_TEXTURE_2D;
else
ctx->internal_target = PIPE_TEXTURE_RECT;
return ctx;
}
@@ -164,6 +176,9 @@ util_destroy_blit(struct blit_state *ctx)
if (ctx->fs[i])
pipe->delete_fs_state(pipe, ctx->fs[i]);
if (ctx->fs_depth)
pipe->delete_fs_state(pipe, ctx->fs_depth);
pipe_resource_reference(&ctx->vbuf, NULL);
FREE(ctx);
@@ -271,7 +286,7 @@ regions_overlap(int srcX0, int srcY0,
* \param writemask controls which channels in the dest surface are sourced
* from the src surface. Disabled channels are sourced
* from (0,0,0,1).
* XXX need some control over blitting Z and/or stencil.
* XXX need some control over blitting stencil.
*/
void
util_blit_pixels_writemask(struct blit_state *ctx,
@@ -294,8 +309,9 @@ util_blit_pixels_writemask(struct blit_state *ctx,
const int srcW = abs(srcX1 - srcX0);
const int srcH = abs(srcY1 - srcY0);
unsigned offset;
boolean overlap;
boolean overlap, dst_is_depth;
float s0, t0, s1, t1;
boolean normalized;
assert(filter == PIPE_TEX_MIPFILTER_NEAREST ||
filter == PIPE_TEX_MIPFILTER_LINEAR);
@@ -335,7 +351,6 @@ util_blit_pixels_writemask(struct blit_state *ctx,
return;
}
/* Create a temporary texture when src and dest alias or when src
* is anything other than a 2d texture.
* XXX should just use appropriate shader to access 1d / 3d slice / cube face,
@@ -347,7 +362,8 @@ util_blit_pixels_writemask(struct blit_state *ctx,
dst->face == srcsub.face &&
dst->level == srcsub.level &&
dst->zslice == srcZ0) ||
src_tex->target != PIPE_TEXTURE_2D)
(src_tex->target != PIPE_TEXTURE_2D &&
src_tex->target != PIPE_TEXTURE_RECT))
{
struct pipe_resource texTemp;
struct pipe_resource *tex;
@@ -372,7 +388,7 @@ util_blit_pixels_writemask(struct blit_state *ctx,
/* create temp texture */
memset(&texTemp, 0, sizeof(texTemp));
texTemp.target = PIPE_TEXTURE_2D;
texTemp.target = ctx->internal_target;
texTemp.format = src_tex->format;
texTemp.last_level = 0;
texTemp.width0 = srcW;
@@ -392,10 +408,19 @@ util_blit_pixels_writemask(struct blit_state *ctx,
src_tex, srcsub, srcLeft, srcTop, srcZ0, /* src */
srcW, srcH); /* size */
s0 = 0.0f;
s1 = 1.0f;
t0 = 0.0f;
t1 = 1.0f;
normalized = tex->target != PIPE_TEXTURE_RECT;
if(normalized) {
s0 = 0.0f;
s1 = 1.0f;
t0 = 0.0f;
t1 = 1.0f;
}
else {
s0 = 0;
s1 = srcW;
t0 = 0;
t1 = srcH;
}
u_sampler_view_default_template(&sv_templ, tex, tex->format);
sampler_view = pipe->create_sampler_view(pipe, tex, &sv_templ);
@@ -415,20 +440,29 @@ util_blit_pixels_writemask(struct blit_state *ctx,
return;
}
s0 = srcX0 / (float)(u_minify(sampler_view->texture->width0, srcsub.level));
s1 = srcX1 / (float)(u_minify(sampler_view->texture->width0, srcsub.level));
t0 = srcY0 / (float)(u_minify(sampler_view->texture->height0, srcsub.level));
t1 = srcY1 / (float)(u_minify(sampler_view->texture->height0, srcsub.level));
s0 = srcX0;
s1 = srcX1;
t0 = srcY0;
t1 = srcY1;
normalized = sampler_view->texture->target != PIPE_TEXTURE_RECT;
if(normalized)
{
s0 /= (float)(u_minify(sampler_view->texture->width0, srcsub.level));
s1 /= (float)(u_minify(sampler_view->texture->width0, srcsub.level));
t0 /= (float)(u_minify(sampler_view->texture->height0, srcsub.level));
t1 /= (float)(u_minify(sampler_view->texture->height0, srcsub.level));
}
}
dst_is_depth = util_format_is_depth_or_stencil(dst->format);
assert(screen->is_format_supported(screen, sampler_view->format, PIPE_TEXTURE_2D,
assert(screen->is_format_supported(screen, sampler_view->format, ctx->internal_target,
sampler_view->texture->nr_samples,
PIPE_BIND_SAMPLER_VIEW, 0));
assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D,
assert(screen->is_format_supported(screen, dst->format, ctx->internal_target,
dst->texture->nr_samples,
PIPE_BIND_RENDER_TARGET, 0));
dst_is_depth ? PIPE_BIND_DEPTH_STENCIL :
PIPE_BIND_RENDER_TARGET, 0));
/* save state (restored below) */
cso_save_blend(ctx->cso);
cso_save_depth_stencil_alpha(ctx->cso);
@@ -444,12 +478,15 @@ util_blit_pixels_writemask(struct blit_state *ctx,
/* set misc state we care about */
cso_set_blend(ctx->cso, &ctx->blend);
cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
cso_set_depth_stencil_alpha(ctx->cso,
dst_is_depth ? &ctx->depthstencil_write :
&ctx->depthstencil_keep);
cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
cso_set_clip(ctx->cso, &ctx->clip);
cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
/* sampler */
ctx->sampler.normalized_coords = normalized;
ctx->sampler.min_img_filter = filter;
ctx->sampler.mag_img_filter = filter;
/* we've limited this already with the sampler view but you never know... */
@@ -472,22 +509,35 @@ util_blit_pixels_writemask(struct blit_state *ctx,
/* texture */
cso_set_fragment_sampler_views(ctx->cso, 1, &sampler_view);
if (ctx->fs[writemask] == NULL)
ctx->fs[writemask] =
util_make_fragment_tex_shader_writemask(pipe, TGSI_TEXTURE_2D,
TGSI_INTERPOLATE_LINEAR,
writemask);
/* shaders */
cso_set_fragment_shader_handle(ctx->cso, ctx->fs[writemask]);
if (dst_is_depth) {
if (ctx->fs_depth == NULL)
ctx->fs_depth =
util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_2D,
TGSI_INTERPOLATE_LINEAR);
cso_set_fragment_shader_handle(ctx->cso, ctx->fs_depth);
} else {
if (ctx->fs[writemask] == NULL)
ctx->fs[writemask] =
util_make_fragment_tex_shader_writemask(pipe, TGSI_TEXTURE_2D,
TGSI_INTERPOLATE_LINEAR,
writemask);
cso_set_fragment_shader_handle(ctx->cso, ctx->fs[writemask]);
}
cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
/* drawing dest */
memset(&fb, 0, sizeof(fb));
fb.width = dst->width;
fb.height = dst->height;
fb.nr_cbufs = 1;
fb.cbufs[0] = dst;
if (dst_is_depth) {
fb.zsbuf = dst;
} else {
fb.nr_cbufs = 1;
fb.cbufs[0] = dst;
}
cso_set_framebuffer(ctx->cso, &fb);
/* draw quad */
@@ -574,6 +624,7 @@ util_blit_pixels_tex(struct blit_state *ctx,
int dstX1, int dstY1,
float z, uint filter)
{
boolean normalized = src_sampler_view->texture->target != PIPE_TEXTURE_RECT;
struct pipe_framebuffer_state fb;
float s0, t0, s1, t1;
unsigned offset;
@@ -586,10 +637,18 @@ util_blit_pixels_tex(struct blit_state *ctx,
assert(tex->width0 != 0);
assert(tex->height0 != 0);
s0 = srcX0 / (float)tex->width0;
s1 = srcX1 / (float)tex->width0;
t0 = srcY0 / (float)tex->height0;
t1 = srcY1 / (float)tex->height0;
s0 = srcX0;
s1 = srcX1;
t0 = srcY0;
t1 = srcY1;
if(normalized)
{
s0 /= (float)tex->width0;
s1 /= (float)tex->width0;
t0 /= (float)tex->height0;
t1 /= (float)tex->height0;
}
assert(ctx->pipe->screen->is_format_supported(ctx->pipe->screen, dst->format,
PIPE_TEXTURE_2D,
@@ -611,12 +670,13 @@ util_blit_pixels_tex(struct blit_state *ctx,
/* set misc state we care about */
cso_set_blend(ctx->cso, &ctx->blend);
cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil_keep);
cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
cso_set_clip(ctx->cso, &ctx->clip);
cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
/* sampler */
ctx->sampler.normalized_coords = normalized;
ctx->sampler.min_img_filter = filter;
ctx->sampler.mag_img_filter = filter;
cso_single_sampler(ctx->cso, 0, &ctx->sampler);
+8 -6
View File
@@ -30,18 +30,20 @@
#define U_BLIT_H
#include "pipe/p_compiler.h"
#ifdef __cplusplus
extern "C" {
#endif
struct pipe_context;
struct pipe_surface;
struct pipe_resource;
struct cso_context;
struct blit_state;
struct pipe_context;
struct pipe_resource;
struct pipe_sampler_view;
struct pipe_subresource;
struct pipe_surface;
extern struct blit_state *
+36 -18
View File
@@ -92,7 +92,7 @@ struct blitter_context_priv
void *velem_state;
/* Sampler state for clamping to a miplevel. */
void *sampler_state[PIPE_MAX_TEXTURE_LEVELS];
void *sampler_state[PIPE_MAX_TEXTURE_LEVELS * 2];
/* Rasterizer state. */
void *rs_state;
@@ -254,6 +254,7 @@ void util_blitter_destroy(struct blitter_context *blitter)
ctx->dsa_write_depth_keep_stencil);
pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_stencil);
pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_write_stencil);
pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil);
pipe->delete_rasterizer_state(pipe, ctx->rs_state);
pipe->delete_vs_state(pipe, ctx->vs_col);
@@ -271,7 +272,7 @@ void util_blitter_destroy(struct blitter_context *blitter)
if (ctx->fs_col[i])
pipe->delete_fs_state(pipe, ctx->fs_col[i]);
for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++)
for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS * 2; i++)
if (ctx->sampler_state[i])
pipe->delete_sampler_state(pipe, ctx->sampler_state[i]);
@@ -319,7 +320,7 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
*/
if (ctx->base.saved_fb_state.nr_cbufs != ~0) {
pipe->set_framebuffer_state(pipe, &ctx->base.saved_fb_state);
util_assign_framebuffer_state(&ctx->base.saved_fb_state, NULL);
util_unreference_framebuffer_state(&ctx->base.saved_fb_state);
ctx->base.saved_fb_state.nr_cbufs = ~0;
}
@@ -417,16 +418,26 @@ static void blitter_set_clear_color(struct blitter_context_priv *ctx,
}
}
static void get_normalized_texcoords(struct pipe_resource *src,
static void get_texcoords(struct pipe_resource *src,
struct pipe_subresource subsrc,
unsigned x1, unsigned y1,
unsigned x2, unsigned y2,
float out[4])
boolean normalized, float out[4])
{
out[0] = x1 / (float)u_minify(src->width0, subsrc.level);
out[1] = y1 / (float)u_minify(src->height0, subsrc.level);
out[2] = x2 / (float)u_minify(src->width0, subsrc.level);
out[3] = y2 / (float)u_minify(src->height0, subsrc.level);
if(normalized)
{
out[0] = x1 / (float)u_minify(src->width0, subsrc.level);
out[1] = y1 / (float)u_minify(src->height0, subsrc.level);
out[2] = x2 / (float)u_minify(src->width0, subsrc.level);
out[3] = y2 / (float)u_minify(src->height0, subsrc.level);
}
else
{
out[0] = x1;
out[1] = y1;
out[2] = x2;
out[3] = y2;
}
}
static void set_texcoords_in_vertices(const float coord[4],
@@ -454,7 +465,7 @@ static void blitter_set_texcoords_2d(struct blitter_context_priv *ctx,
unsigned i;
float coord[4];
get_normalized_texcoords(src, subsrc, x1, y1, x2, y2, coord);
get_texcoords(src, subsrc, x1, y1, x2, y2, TRUE, coord);
set_texcoords_in_vertices(coord, &ctx->vertices[0][1][0], 8);
for (i = 0; i < 4; i++) {
@@ -489,7 +500,7 @@ static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx,
float coord[4];
float st[4][2];
get_normalized_texcoords(src, subsrc, x1, y1, x2, y2, coord);
get_texcoords(src, subsrc, x1, y1, x2, y2, TRUE, coord);
set_texcoords_in_vertices(coord, &st[0][0], 2);
util_map_texcoords2d_onto_cubemap(subsrc.face,
@@ -523,7 +534,7 @@ static void blitter_draw_quad(struct blitter_context_priv *ctx)
static INLINE
void **blitter_get_sampler_state(struct blitter_context_priv *ctx,
int miplevel)
int miplevel, boolean normalized)
{
struct pipe_context *pipe = ctx->base.pipe;
struct pipe_sampler_state *sampler_state = &ctx->template_sampler_state;
@@ -531,18 +542,19 @@ void **blitter_get_sampler_state(struct blitter_context_priv *ctx,
assert(miplevel < PIPE_MAX_TEXTURE_LEVELS);
/* Create the sampler state on-demand. */
if (!ctx->sampler_state[miplevel]) {
if (!ctx->sampler_state[miplevel * 2 + normalized]) {
sampler_state->lod_bias = miplevel;
sampler_state->min_lod = miplevel;
sampler_state->max_lod = miplevel;
sampler_state->normalized_coords = normalized;
ctx->sampler_state[miplevel] = pipe->create_sampler_state(pipe,
ctx->sampler_state[miplevel * 2 + normalized] = pipe->create_sampler_state(pipe,
sampler_state);
}
/* Return void** so that it can be passed to bind_fragment_sampler_states
* directly. */
return &ctx->sampler_state[miplevel];
return &ctx->sampler_state[miplevel * 2 + normalized];
}
static INLINE
@@ -568,6 +580,8 @@ pipe_tex_to_tgsi_tex(enum pipe_texture_target pipe_tex_target)
return TGSI_TEXTURE_1D;
case PIPE_TEXTURE_2D:
return TGSI_TEXTURE_2D;
case PIPE_TEXTURE_RECT:
return TGSI_TEXTURE_RECT;
case PIPE_TEXTURE_3D:
return TGSI_TEXTURE_3D;
case PIPE_TEXTURE_CUBE:
@@ -716,6 +730,7 @@ void util_blitter_copy_region(struct blitter_context *blitter,
struct pipe_sampler_view viewTempl, *view;
unsigned bind;
boolean is_stencil, is_depth;
boolean normalized;
/* Give up if textures are not set. */
assert(dst && src);
@@ -787,6 +802,8 @@ void util_blitter_copy_region(struct blitter_context *blitter,
fb_state.zsbuf = 0;
}
normalized = src->target != PIPE_TEXTURE_RECT;
/* Initialize sampler view. */
u_sampler_view_default_template(&viewTempl, src, src->format);
view = pipe->create_sampler_view(pipe, src, &viewTempl);
@@ -795,7 +812,7 @@ void util_blitter_copy_region(struct blitter_context *blitter,
pipe->bind_rasterizer_state(pipe, ctx->rs_state);
pipe->bind_vs_state(pipe, ctx->vs_tex);
pipe->bind_fragment_sampler_states(pipe, 1,
blitter_get_sampler_state(ctx, subsrc.level));
blitter_get_sampler_state(ctx, subsrc.level, normalized));
pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
pipe->set_fragment_sampler_views(pipe, 1, &view);
pipe->set_framebuffer_state(pipe, &fb_state);
@@ -806,11 +823,12 @@ void util_blitter_copy_region(struct blitter_context *blitter,
/* Draw the quad with the draw_rectangle callback. */
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
{
/* Set texture coordinates. */
float coord[4];
get_normalized_texcoords(src, subsrc, srcx, srcy,
srcx+width, srcy+height, coord);
get_texcoords(src, subsrc, srcx, srcy,
srcx+width, srcy+height, normalized, coord);
/* Draw. */
blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, 0,
+2 -34
View File
@@ -27,6 +27,7 @@
#ifndef U_BLITTER_H
#define U_BLITTER_H
#include "util/u_framebuffer.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
@@ -258,45 +259,12 @@ void util_blitter_save_vertex_shader(struct blitter_context *blitter,
blitter->saved_vs = vs;
}
/* XXX This should probably be moved elsewhere. */
static INLINE
void util_assign_framebuffer_state(struct pipe_framebuffer_state *dst,
const struct pipe_framebuffer_state *src)
{
unsigned i;
if (src) {
/* Reference all surfaces. */
for (i = 0; i < src->nr_cbufs; i++) {
pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]);
}
for (; i < dst->nr_cbufs; i++) {
pipe_surface_reference(&dst->cbufs[i], NULL);
}
pipe_surface_reference(&dst->zsbuf, src->zsbuf);
dst->nr_cbufs = src->nr_cbufs;
dst->width = src->width;
dst->height = src->height;
} else {
/* Set all surfaces to NULL. */
for (i = 0; i < dst->nr_cbufs; i++) {
pipe_surface_reference(&dst->cbufs[i], NULL);
}
pipe_surface_reference(&dst->zsbuf, NULL);
dst->nr_cbufs = 0;
}
}
static INLINE
void util_blitter_save_framebuffer(struct blitter_context *blitter,
const struct pipe_framebuffer_state *state)
{
blitter->saved_fb_state.nr_cbufs = 0; /* It's ~0 now, meaning it's unsaved. */
util_assign_framebuffer_state(&blitter->saved_fb_state, state);
util_copy_framebuffer_state(&blitter->saved_fb_state, state);
}
static INLINE
+3 -174
View File
@@ -73,7 +73,9 @@
#endif
#ifdef DEBUG
DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", FALSE)
#endif
struct util_cpu_caps util_cpu_caps;
@@ -83,61 +85,6 @@ static int has_cpuid(void);
#endif
#if defined(PIPE_ARCH_X86)
/* The sigill handlers */
#if defined(PIPE_OS_LINUX) /*&& defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC)*/
static void
sigill_handler_sse(int signal, struct sigcontext sc)
{
/* Both the "xorps %%xmm0,%%xmm0" and "divps %xmm0,%%xmm1"
* instructions are 3 bytes long. We must increment the instruction
* pointer manually to avoid repeated execution of the offending
* instruction.
*
* If the SIGILL is caused by a divide-by-zero when unmasked
* exceptions aren't supported, the SIMD FPU status and control
* word will be restored at the end of the test, so we don't need
* to worry about doing it here. Besides, we may not be able to...
*/
sc.eip += 3;
util_cpu_caps.has_sse=0;
}
static void
sigfpe_handler_sse(int signal, struct sigcontext sc)
{
if (sc.fpstate->magic != 0xffff) {
/* Our signal context has the extended FPU state, so reset the
* divide-by-zero exception mask and clear the divide-by-zero
* exception bit.
*/
sc.fpstate->mxcsr |= 0x00000200;
sc.fpstate->mxcsr &= 0xfffffffb;
} else {
/* If we ever get here, we're completely hosed.
*/
}
}
#endif /* PIPE_OS_LINUX && _POSIX_SOURCE && X86_FXSR_MAGIC */
#if defined(PIPE_OS_WINDOWS)
static LONG CALLBACK
win32_sig_handler_sse(EXCEPTION_POINTERS* ep)
{
if(ep->ExceptionRecord->ExceptionCode==EXCEPTION_ILLEGAL_INSTRUCTION){
ep->ContextRecord->Eip +=3;
util_cpu_caps.has_sse=0;
return EXCEPTION_CONTINUE_EXECUTION;
}
return EXCEPTION_CONTINUE_SEARCH;
}
#endif /* PIPE_OS_WINDOWS */
#endif /* PIPE_ARCH_X86 */
#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_APPLE)
static jmp_buf __lv_powerpc_jmpbuf;
static volatile sig_atomic_t __lv_powerpc_canjump = 0;
@@ -194,123 +141,8 @@ check_os_altivec_support(void)
}
#endif /* PIPE_ARCH_PPC */
/* If we're running on a processor that can do SSE, let's see if we
* are allowed to or not. This will catch 2.4.0 or later kernels that
* haven't been configured for a Pentium III but are running on one,
* and RedHat patched 2.2 kernels that have broken exception handling
* support for user space apps that do SSE.
*/
#if defined(PIPE_ARCH_X86) || defined (PIPE_ARCH_X86_64)
static void
check_os_katmai_support(void)
{
#if defined(PIPE_ARCH_X86)
#if defined(PIPE_OS_FREEBSD)
int has_sse=0, ret;
int len = sizeof (has_sse);
ret = sysctlbyname("hw.instruction_sse", &has_sse, &len, NULL, 0);
if (ret || !has_sse)
util_cpu_caps.has_sse=0;
#elif defined(PIPE_OS_NETBSD) || defined(PIPE_OS_OPENBSD)
int has_sse, has_sse2, ret, mib[2];
int varlen;
mib[0] = CTL_MACHDEP;
mib[1] = CPU_SSE;
varlen = sizeof (has_sse);
ret = sysctl(mib, 2, &has_sse, &varlen, NULL, 0);
if (ret < 0 || !has_sse) {
util_cpu_caps.has_sse = 0;
} else {
util_cpu_caps.has_sse = 1;
}
mib[1] = CPU_SSE2;
varlen = sizeof (has_sse2);
ret = sysctl(mib, 2, &has_sse2, &varlen, NULL, 0);
if (ret < 0 || !has_sse2) {
util_cpu_caps.has_sse2 = 0;
} else {
util_cpu_caps.has_sse2 = 1;
}
util_cpu_caps.has_sse = 0; /* FIXME ?!?!? */
#elif defined(PIPE_OS_WINDOWS)
LPTOP_LEVEL_EXCEPTION_FILTER exc_fil;
if (util_cpu_caps.has_sse) {
exc_fil = SetUnhandledExceptionFilter(win32_sig_handler_sse);
#if defined(PIPE_CC_GCC)
__asm __volatile ("xorps %xmm0, %xmm0");
#elif defined(PIPE_CC_MSVC)
__asm {
xorps xmm0, xmm0 /* executing SSE instruction */
}
#else
#error Unsupported compiler
#endif
SetUnhandledExceptionFilter(exc_fil);
}
#elif defined(PIPE_OS_LINUX)
struct sigaction saved_sigill;
struct sigaction saved_sigfpe;
/* Save the original signal handlers.
*/
sigaction(SIGILL, NULL, &saved_sigill);
sigaction(SIGFPE, NULL, &saved_sigfpe);
signal(SIGILL, (void (*)(int))sigill_handler_sse);
signal(SIGFPE, (void (*)(int))sigfpe_handler_sse);
/* Emulate test for OSFXSR in CR4. The OS will set this bit if it
* supports the extended FPU save and restore required for SSE. If
* we execute an SSE instruction on a PIII and get a SIGILL, the OS
* doesn't support Streaming SIMD Exceptions, even if the processor
* does.
*/
if (util_cpu_caps.has_sse) {
__asm __volatile ("xorps %xmm1, %xmm0");
}
/* Emulate test for OSXMMEXCPT in CR4. The OS will set this bit if
* it supports unmasked SIMD FPU exceptions. If we unmask the
* exceptions, do a SIMD divide-by-zero and get a SIGILL, the OS
* doesn't support unmasked SIMD FPU exceptions. If we get a SIGFPE
* as expected, we're okay but we need to clean up after it.
*
* Are we being too stringent in our requirement that the OS support
* unmasked exceptions? Certain RedHat 2.2 kernels enable SSE by
* setting CR4.OSFXSR but don't support unmasked exceptions. Win98
* doesn't even support them. We at least know the user-space SSE
* support is good in kernels that do support unmasked exceptions,
* and therefore to be safe I'm going to leave this test in here.
*/
if (util_cpu_caps.has_sse) {
/* test_os_katmai_exception_support(); */
}
/* Restore the original signal handlers.
*/
sigaction(SIGILL, &saved_sigill, NULL);
sigaction(SIGFPE, &saved_sigfpe, NULL);
#else
/* We can't use POSIX signal handling to test the availability of
* SSE, so we disable it by default.
*/
util_cpu_caps.has_sse = 0;
#endif /* __linux__ */
#endif
#if defined(PIPE_ARCH_X86_64)
util_cpu_caps.has_sse = 1;
#endif
}
static int has_cpuid(void)
{
#if defined(PIPE_ARCH_X86)
@@ -469,9 +301,6 @@ util_cpu_detect(void)
util_cpu_caps.cacheline = regs2[2] & 0xFF;
}
if (util_cpu_caps.has_sse)
check_os_katmai_support();
if (!util_cpu_caps.has_sse) {
util_cpu_caps.has_sse2 = 0;
util_cpu_caps.has_sse3 = 0;
@@ -0,0 +1,81 @@
/**************************************************************************
*
* Copyright 2010 Luca Barbieri
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#include <pipe/p_state.h>
#include <util/u_format.h>
#include <util/u_debug_describe.h>
#include <util/u_string.h>
void
debug_describe_reference(char* buf, const struct pipe_reference*ptr)
{
strcpy(buf, "pipe_object");
}
void
debug_describe_resource(char* buf, const struct pipe_resource *ptr)
{
switch(ptr->target)
{
case PIPE_BUFFER:
util_sprintf(buf, "pipe_buffer<%u>", (unsigned)util_format_get_stride(ptr->format, ptr->width0));
break;
case PIPE_TEXTURE_1D:
util_sprintf(buf, "pipe_texture1d<%u,%s,%u>", ptr->width0, util_format_short_name(ptr->format), ptr->last_level);
break;
case PIPE_TEXTURE_2D:
util_sprintf(buf, "pipe_texture2d<%u,%u,%s,%u>", ptr->width0, ptr->height0, util_format_short_name(ptr->format), ptr->last_level);
break;
case PIPE_TEXTURE_RECT:
util_sprintf(buf, "pipe_texture_rect<%u,%u,%s>", ptr->width0, ptr->height0, util_format_short_name(ptr->format));
break;
case PIPE_TEXTURE_CUBE:
util_sprintf(buf, "pipe_texture_cube<%u,%u,%s,%u>", ptr->width0, ptr->height0, util_format_short_name(ptr->format), ptr->last_level);
break;
case PIPE_TEXTURE_3D:
util_sprintf(buf, "pipe_texture3d<%u,%u,%u,%s,%u>", ptr->width0, ptr->height0, ptr->depth0, util_format_short_name(ptr->format), ptr->last_level);
break;
default:
util_sprintf(buf, "pipe_martian_resource<%u>", ptr->target);
break;
}
}
void
debug_describe_surface(char* buf, const struct pipe_surface *ptr)
{
char res[128];
debug_describe_resource(res, ptr->texture);
util_sprintf(buf, "pipe_surface<%s,%u,%u,%u>", res, ptr->face, ptr->level, ptr->zslice);
}
void
debug_describe_sampler_view(char* buf, const struct pipe_sampler_view *ptr)
{
char res[128];
debug_describe_resource(res, ptr->texture);
util_sprintf(buf, "pipe_sampler_view<%s,%s>", res, util_format_short_name(ptr->format));
}
@@ -0,0 +1,49 @@
/**************************************************************************
*
* Copyright 2010 Luca Barbieri
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#ifndef U_DEBUG_DESCRIBE_H_
#define U_DEBUG_DESCRIBE_H_
#ifdef __cplusplus
extern "C" {
#endif
struct pipe_reference;
struct pipe_resource;
struct pipe_surface;
struct pipe_sampler_view;
/* a 256-byte buffer is necessary and sufficient */
void debug_describe_reference(char* buf, const struct pipe_reference*ptr);
void debug_describe_resource(char* buf, const struct pipe_resource *ptr);
void debug_describe_surface(char* buf, const struct pipe_surface *ptr);
void debug_describe_sampler_view(char* buf, const struct pipe_sampler_view *ptr);
#ifdef __cplusplus
}
#endif
#endif /* U_DEBUG_DESCRIBE_H_ */
+181
View File
@@ -0,0 +1,181 @@
/**************************************************************************
*
* Copyright 2010 Luca Barbieri
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#if defined(DEBUG) && (!defined(PIPE_OS_WINDOWS) || defined(PIPE_SUBSYSTEM_WINDOWS_USER))
/* see http://www.mozilla.org/performance/refcnt-balancer.html for what do with the output
* on Linux, use tools/addr2line.sh to postprocess it before anything else
**/
#include <util/u_debug.h>
#include <util/u_debug_refcnt.h>
#include <util/u_debug_stack.h>
#include <util/u_debug_symbol.h>
#include <util/u_string.h>
#include <util/u_hash_table.h>
#include <os/os_thread.h>
#include <os/os_stream.h>
int debug_refcnt_state;
struct os_stream* stream;
/* TODO: maybe move this serial machinery to a stand-alone module and expose it? */
static pipe_mutex serials_mutex;
static struct util_hash_table* serials_hash;
static unsigned serials_last;
static unsigned hash_ptr(void* p)
{
return (unsigned)(uintptr_t)p;
}
static int compare_ptr(void* a, void* b)
{
if(a == b)
return 0;
else if(a < b)
return -1;
else
return 1;
}
static boolean debug_serial(void* p, unsigned* pserial)
{
unsigned serial;
boolean found = TRUE;
pipe_mutex_lock(serials_mutex);
if(!serials_hash)
serials_hash = util_hash_table_create(hash_ptr, compare_ptr);
serial = (unsigned)(uintptr_t)util_hash_table_get(serials_hash, p);
if(!serial)
{
/* time to stop logging... (you'll have a 100 GB logfile at least at this point)
* TODO: avoid this
*/
serial = ++serials_last;
if(!serial)
{
debug_error("More than 2^32 objects detected, aborting.\n");
os_abort();
}
util_hash_table_set(serials_hash, p, (void*)(uintptr_t)serial);
found = FALSE;
}
pipe_mutex_unlock(serials_mutex);
*pserial = serial;
return found;
}
static void debug_serial_delete(void* p)
{
pipe_mutex_lock(serials_mutex);
util_hash_table_remove(serials_hash, p);
pipe_mutex_unlock(serials_mutex);
}
#define STACK_LEN 64
static void dump_stack(const char* symbols[STACK_LEN])
{
unsigned i;
for(i = 0; i < STACK_LEN; ++i)
{
if(symbols[i])
os_stream_printf(stream, "%s\n", symbols[i]);
}
os_stream_write(stream, "\n", 1);
}
void debug_reference_slowpath(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
{
if(debug_refcnt_state < 0)
return;
if(!debug_refcnt_state)
{
const char* filename = debug_get_option("GALLIUM_REFCNT_LOG", NULL);
if(filename && filename[0])
stream = os_file_stream_create(filename);
if(stream)
debug_refcnt_state = 1;
else
debug_refcnt_state = -1;
}
if(debug_refcnt_state > 0)
{
struct debug_stack_frame frames[STACK_LEN];
const char* symbols[STACK_LEN];
char buf[1024];
unsigned i;
unsigned refcnt = p->count;
unsigned serial;
boolean existing = debug_serial((void*)p, &serial);
debug_backtrace_capture(frames, 1, STACK_LEN);
for(i = 0; i < STACK_LEN; ++i)
{
if(frames[i].function)
symbols[i] = debug_symbol_name_cached(frames[i].function);
else
symbols[i] = 0;
}
get_desc(buf, p);
if(!existing)
{
os_stream_printf(stream, "<%s> %p %u Create\n", buf, p, serial);
dump_stack(symbols);
/* this is there to provide a gradual change even if we don't see the initialization */
for(i = 1; i <= refcnt - change; ++i)
{
os_stream_printf(stream, "<%s> %p %u AddRef %u\n", buf, p, serial, i);
dump_stack(symbols);
}
}
if(change)
{
os_stream_printf(stream, "<%s> %p %u %s %u\n", buf, p, serial, change > 0 ? "AddRef" : "Release", refcnt);
dump_stack(symbols);
}
if(!refcnt)
{
debug_serial_delete((void*)p);
os_stream_printf(stream, "<%s> %p %u Destroy\n", buf, p, serial);
dump_stack(symbols);
}
os_stream_flush(stream);
}
}
#endif
@@ -0,0 +1,63 @@
/**************************************************************************
*
* Copyright 2010 Luca Barbieri
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#ifndef U_DEBUG_REFCNT_H_
#define U_DEBUG_REFCNT_H_
#include <pipe/p_config.h>
#include <pipe/p_state.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef void (*debug_reference_descriptor)(char*, const struct pipe_reference*);
#if defined(DEBUG) && (!defined(PIPE_OS_WINDOWS) || defined(PIPE_SUBSYSTEM_WINDOWS_USER))
extern int debug_refcnt_state;
void debug_reference_slowpath(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change);
static INLINE void debug_reference(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
{
if (debug_refcnt_state >= 0)
debug_reference_slowpath(p, get_desc, change);
}
#else
static INLINE void debug_reference(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
{
}
#endif
#ifdef __cplusplus
}
#endif
#endif /* U_DEBUG_REFCNT_H_ */
+88 -15
View File
@@ -33,9 +33,12 @@
*/
#include "pipe/p_compiler.h"
#include "os/os_thread.h"
#include "u_string.h"
#include "u_debug.h"
#include "u_debug_symbol.h"
#include "u_hash_table.h"
#if defined(PIPE_SUBSYSTEM_WINDOWS_USER) && defined(PIPE_ARCH_X86)
@@ -113,8 +116,8 @@ BOOL WINAPI j_SymGetSymFromAddr(HANDLE hProcess, DWORD Address, PDWORD Displacem
}
static INLINE boolean
debug_symbol_print_imagehlp(const void *addr)
static INLINE void
debug_symbol_name_imagehlp(const void *addr, char* buf, unsigned size)
{
HANDLE hProcess;
BYTE symbolBuffer[1024];
@@ -131,25 +134,95 @@ debug_symbol_print_imagehlp(const void *addr)
if(j_SymInitialize(hProcess, NULL, TRUE))
bSymInitialized = TRUE;
}
if(!j_SymGetSymFromAddr(hProcess, (DWORD)addr, &dwDisplacement, pSymbol))
return FALSE;
debug_printf("\t%s\n", pSymbol->Name);
return TRUE;
buf[0] = 0;
else
{
strncpy(buf, pSymbol->Name, size);
buf[size - 1] = 0;
}
}
#endif
#ifdef __GLIBC__
#include <execinfo.h>
/* This can only provide dynamic symbols, or binary offsets into a file.
*
* To fix this, post-process the output with tools/addr2line.sh
*/
static INLINE void
debug_symbol_name_glibc(const void *addr, char* buf, unsigned size)
{
char** syms = backtrace_symbols((void**)&addr, 1);
strncpy(buf, syms[0], size);
buf[size - 1] = 0;
free(syms);
}
#endif
void
debug_symbol_name(const void *addr, char* buf, unsigned size)
{
#if defined(PIPE_SUBSYSTEM_WINDOWS_USER) && defined(PIPE_ARCH_X86)
debug_symbol_name_imagehlp(addr, buf, size);
if(buf[0])
return;
#endif
#ifdef __GLIBC__
debug_symbol_name_glibc(addr, buf, size);
if(buf[0])
return;
#endif
util_snprintf(buf, size, "%p", addr);
buf[size - 1] = 0;
}
void
debug_symbol_print(const void *addr)
{
#if defined(PIPE_SUBSYSTEM_WINDOWS_USER) && defined(PIPE_ARCH_X86)
if(debug_symbol_print_imagehlp(addr))
return;
#endif
debug_printf("\t%p\n", addr);
char buf[1024];
debug_symbol_name(addr, buf, sizeof(buf));
debug_printf("\t%s\n", buf);
}
struct util_hash_table* symbols_hash;
pipe_mutex symbols_mutex;
static unsigned hash_ptr(void* p)
{
return (unsigned)(uintptr_t)p;
}
static int compare_ptr(void* a, void* b)
{
if(a == b)
return 0;
else if(a < b)
return -1;
else
return 1;
}
const char*
debug_symbol_name_cached(const void *addr)
{
const char* name;
pipe_mutex_lock(symbols_mutex);
if(!symbols_hash)
symbols_hash = util_hash_table_create(hash_ptr, compare_ptr);
name = util_hash_table_get(symbols_hash, (void*)addr);
if(!name)
{
char buf[1024];
debug_symbol_name(addr, buf, sizeof(buf));
name = strdup(buf);
util_hash_table_set(symbols_hash, (void*)addr, (void*)name);
}
pipe_mutex_unlock(symbols_mutex);
return name;
}
+6 -1
View File
@@ -43,8 +43,13 @@ extern "C" {
void
debug_symbol_print(const void *addr);
debug_symbol_name(const void *addr, char* buf, unsigned size);
const char*
debug_symbol_name_cached(const void *addr);
void
debug_symbol_print(const void *addr);
#ifdef __cplusplus
}
@@ -1,9 +1,39 @@
/**************************************************************************
*
* Copyright 2010 Luca Barbieri
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#ifndef U_DIRTY_SURFACES_H_
#define U_DIRTY_SURFACES_H_
#include "pipe/p_state.h"
#include "util/u_double_list.h"
#include "util/u_math.h"
struct pipe_context;
typedef void (*util_dirty_surface_flush_t) (struct pipe_context *, struct pipe_surface *);
struct util_dirty_surfaces
+1
View File
@@ -31,6 +31,7 @@
#include "pipe/p_compiler.h"
#include "pipe/p_context.h"
#include "pipe/p_state.h"
static INLINE void
+3
View File
@@ -106,6 +106,9 @@ util_dynarray_trim(struct util_dynarray *buf)
#define util_dynarray_pop_ptr(buf, type) (type*)((char*)(buf)->data + ((buf)->size -= sizeof(type)))
#define util_dynarray_pop(buf, type) *util_dynarray_pop_ptr(buf, type)
#define util_dynarray_contains(buf, type) ((buf)->size >= sizeof(type))
#define util_dynarray_element(buf, type, idx) ((type*)(buf)->data + (idx))
#define util_dynarray_begin(buf) ((buf)->data)
#define util_dynarray_end(buf) ((void*)util_dynarray_element((buf), char, (buf)->size))
#endif /* U_DYNARRAY_H */
@@ -1255,6 +1255,7 @@ fallback_gen_mipmap(struct gen_mipmap_state *ctx,
make_1d_mipmap(ctx, pt, face, baseLevel, lastLevel);
break;
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_CUBE:
make_2d_mipmap(ctx, pt, face, baseLevel, lastLevel);
break;
+19 -6
View File
@@ -33,6 +33,8 @@
#include "pipe/p_state.h"
#include "pipe/p_screen.h"
#include "util/u_debug.h"
#include "util/u_debug_describe.h"
#include "util/u_debug_refcnt.h"
#include "util/u_atomic.h"
#include "util/u_box.h"
#include "util/u_math.h"
@@ -67,7 +69,9 @@ pipe_is_referenced(struct pipe_reference *reference)
* \return TRUE if the object's refcount hits zero and should be destroyed.
*/
static INLINE boolean
pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
pipe_reference_described(struct pipe_reference *ptr,
struct pipe_reference *reference,
debug_reference_descriptor get_desc)
{
boolean destroy = FALSE;
@@ -76,6 +80,7 @@ pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
if (reference) {
assert(pipe_is_referenced(reference));
p_atomic_inc(&reference->count);
debug_reference(reference, get_desc, 1);
}
if (ptr) {
@@ -83,41 +88,49 @@ pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
if (p_atomic_dec_zero(&ptr->count)) {
destroy = TRUE;
}
debug_reference(ptr, get_desc, -1);
}
}
return destroy;
}
static INLINE boolean
pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
{
return pipe_reference_described(ptr, reference,
(debug_reference_descriptor)debug_describe_reference);
}
static INLINE void
pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf)
{
struct pipe_surface *old_surf = *ptr;
if (pipe_reference(&(*ptr)->reference, &surf->reference))
if (pipe_reference_described(&(*ptr)->reference, &surf->reference,
(debug_reference_descriptor)debug_describe_surface))
old_surf->texture->screen->tex_surface_destroy(old_surf);
*ptr = surf;
}
static INLINE void
pipe_resource_reference(struct pipe_resource **ptr, struct pipe_resource *tex)
{
struct pipe_resource *old_tex = *ptr;
if (pipe_reference(&(*ptr)->reference, &tex->reference))
if (pipe_reference_described(&(*ptr)->reference, &tex->reference,
(debug_reference_descriptor)debug_describe_resource))
old_tex->screen->resource_destroy(old_tex->screen, old_tex);
*ptr = tex;
}
static INLINE void
pipe_sampler_view_reference(struct pipe_sampler_view **ptr, struct pipe_sampler_view *view)
{
struct pipe_sampler_view *old_view = *ptr;
if (pipe_reference(&(*ptr)->reference, &view->reference))
if (pipe_reference_described(&(*ptr)->reference, &view->reference,
(debug_reference_descriptor)debug_describe_sampler_view))
old_view->context->sampler_view_destroy(old_view->context, old_view);
*ptr = view;
}
+149
View File
@@ -0,0 +1,149 @@
/**************************************************************************
*
* Copyright 2010 Luca Barbieri
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#include "util/u_debug.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_scan.h"
#include "util/u_linkage.h"
/* we must only record the registers that are actually used, not just declared */
static INLINE boolean
util_semantic_set_test_and_set(struct util_semantic_set *set, unsigned value)
{
unsigned mask = 1 << (value % (sizeof(long) * 8));
unsigned long *p = &set->masks[value / (sizeof(long) * 8)];
unsigned long v = *p & mask;
*p |= mask;
return !!v;
}
unsigned
util_semantic_set_from_program_file(struct util_semantic_set *set, const struct tgsi_token *tokens, enum tgsi_file_type file)
{
struct tgsi_shader_info info;
struct tgsi_parse_context parse;
unsigned count = 0;
ubyte *semantic_name;
ubyte *semantic_index;
tgsi_scan_shader(tokens, &info);
if(file == TGSI_FILE_INPUT)
{
semantic_name = info.input_semantic_name;
semantic_index = info.input_semantic_index;
}
else if(file == TGSI_FILE_OUTPUT)
{
semantic_name = info.output_semantic_name;
semantic_index = info.output_semantic_index;
}
else
{
assert(0);
semantic_name = NULL;
semantic_index = NULL;
}
tgsi_parse_init(&parse, tokens);
memset(set->masks, 0, sizeof(set->masks));
while(!tgsi_parse_end_of_tokens(&parse))
{
tgsi_parse_token(&parse);
if(parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION)
{
const struct tgsi_full_instruction *finst = &parse.FullToken.FullInstruction;
unsigned i;
for(i = 0; i < finst->Instruction.NumDstRegs; ++i)
{
if(finst->Dst[i].Register.File == file)
{
unsigned idx = finst->Dst[i].Register.Index;
if(semantic_name[idx] == TGSI_SEMANTIC_GENERIC)
{
if(!util_semantic_set_test_and_set(set, semantic_index[idx]))
++count;
}
}
}
for(i = 0; i < finst->Instruction.NumSrcRegs; ++i)
{
if(finst->Src[i].Register.File == file)
{
unsigned idx = finst->Src[i].Register.Index;
if(semantic_name[idx] == TGSI_SEMANTIC_GENERIC)
{
if(!util_semantic_set_test_and_set(set, semantic_index[idx]))
++count;
}
}
}
}
}
tgsi_parse_free(&parse);
return count;
}
#define UTIL_SEMANTIC_SET_FOR_EACH(i, set) for(i = 0; i < 256; ++i) if(set->masks[i / (sizeof(long) * 8)] & (1 << (i % (sizeof(long) * 8))))
void
util_semantic_layout_from_set(unsigned char *layout, const struct util_semantic_set *set, unsigned efficient_slots, unsigned num_slots)
{
int first = -1;
int last = -1;
unsigned i;
memset(layout, 0xff, num_slots);
UTIL_SEMANTIC_SET_FOR_EACH(i, set)
{
if(first < 0)
first = i;
last = i;
}
if(last < efficient_slots)
{
UTIL_SEMANTIC_SET_FOR_EACH(i, set)
layout[i] = i;
}
else if((last - first) < efficient_slots)
{
UTIL_SEMANTIC_SET_FOR_EACH(i, set)
layout[i - first] = i;
}
else
{
unsigned idx = 0;
UTIL_SEMANTIC_SET_FOR_EACH(i, set)
layout[idx++] = i;
}
}
+66
View File
@@ -0,0 +1,66 @@
/**************************************************************************
*
* Copyright 2010 Luca Barbieri
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#ifndef U_LINKAGE_H_
#define U_LINKAGE_H_
#include "pipe/p_compiler.h"
#include "pipe/p_shader_tokens.h"
struct util_semantic_set
{
unsigned long masks[256 / 8 / sizeof(unsigned long)];
};
static INLINE bool
util_semantic_set_contains(struct util_semantic_set *set, unsigned char value)
{
return !!(set->masks[value / (sizeof(long) * 8)] & (1 << (value / (sizeof(long) * 8))));
}
unsigned util_semantic_set_from_program_file(struct util_semantic_set *set, const struct tgsi_token *tokens, enum tgsi_file_type file);
/* efficient_slots is the number of slots such that hardware performance is
* the same for using that amount, with holes, or less slots but with less
* holes.
*
* num_slots is the size of the layout array and hardware limit instead.
*
* efficient_slots == 0 or efficient_solts == num_slots are typical settings.
*/
void util_semantic_layout_from_set(unsigned char *layout, const struct util_semantic_set *set, unsigned efficient_slots, unsigned num_slots);
static INLINE void
util_semantic_table_from_layout(unsigned char *table, unsigned char *layout, unsigned char first_slot_value, unsigned char num_slots)
{
int i;
memset(table, 0xff, sizeof(table));
for(i = 0; i < num_slots; ++i)
table[layout[i]] = first_slot_value + i;
}
#endif /* U_LINKAGE_H_ */

Some files were not shown because too many files have changed in this diff Show More