diff --git a/drivers/video/Gallium/auxiliary/cso_cache/cso_cache.c b/drivers/video/Gallium/auxiliary/cso_cache/cso_cache.c new file mode 100644 index 0000000000..e276fd1390 --- /dev/null +++ b/drivers/video/Gallium/auxiliary/cso_cache/cso_cache.c @@ -0,0 +1,322 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Zack Rusin + */ + +#include "util/u_debug.h" + +#include "util/u_memory.h" + +#include "cso_cache.h" +#include "cso_hash.h" + + +struct cso_cache { + struct cso_hash *hashes[CSO_CACHE_MAX]; + int max_size; + + cso_sanitize_callback sanitize_cb; + void *sanitize_data; +}; + +#if 1 +static unsigned hash_key(const void *key, unsigned key_size) +{ + unsigned *ikey = (unsigned *)key; + unsigned hash = 0, i; + + assert(key_size % 4 == 0); + + /* I'm sure this can be improved on: + */ + for (i = 0; i < key_size/4; i++) + hash ^= ikey[i]; + + return hash; +} +#else +static unsigned hash_key(const unsigned char *p, int n) +{ + unsigned h = 0; + unsigned g; + + while (n--) { + h = (h << 4) + *p++; + if ((g = (h & 0xf0000000)) != 0) + h ^= g >> 23; + h &= ~g; + } + return h; +} +#endif + +unsigned cso_construct_key(void *item, int item_size) +{ + return hash_key((item), item_size); +} + +static INLINE struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_type type) +{ + struct cso_hash *hash; + hash = sc->hashes[type]; + return hash; +} + +static void delete_blend_state(void *state, void *data) +{ + struct cso_blend *cso = (struct cso_blend *)state; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); +} + +static void delete_depth_stencil_state(void *state, void *data) +{ + struct cso_depth_stencil_alpha *cso = (struct cso_depth_stencil_alpha *)state; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); +} + +static void delete_sampler_state(void *state, void *data) +{ + struct cso_sampler *cso = (struct cso_sampler *)state; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); +} + +static void delete_rasterizer_state(void *state, void *data) +{ + struct cso_rasterizer *cso = (struct cso_rasterizer *)state; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); +} + +static void delete_velements(void *state, void *data) +{ + struct cso_velements *cso = (struct cso_velements *)state; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); +} + +static INLINE void delete_cso(void *state, enum cso_cache_type type) +{ + switch (type) { + case CSO_BLEND: + delete_blend_state(state, 0); + break; + case CSO_SAMPLER: + delete_sampler_state(state, 0); + break; + case CSO_DEPTH_STENCIL_ALPHA: + delete_depth_stencil_state(state, 0); + break; + case CSO_RASTERIZER: + delete_rasterizer_state(state, 0); + break; + case CSO_VELEMENTS: + delete_velements(state, 0); + break; + default: + assert(0); + FREE(state); + } +} + + +static INLINE void sanitize_hash(struct cso_cache *sc, + struct cso_hash *hash, + enum cso_cache_type type, + int max_size) +{ + if (sc->sanitize_cb) + sc->sanitize_cb(hash, type, max_size, sc->sanitize_data); +} + + +static INLINE void sanitize_cb(struct cso_hash *hash, enum cso_cache_type type, + int max_size, void *user_data) +{ + /* if we're approach the maximum size, remove fourth of the entries + * otherwise every subsequent call will go through the same */ + int hash_size = cso_hash_size(hash); + int max_entries = (max_size > hash_size) ? max_size : hash_size; + int to_remove = (max_size < max_entries) * max_entries/4; + if (hash_size > max_size) + to_remove += hash_size - max_size; + while (to_remove) { + /*remove elements until we're good */ + /*fixme: currently we pick the nodes to remove at random*/ + struct cso_hash_iter iter = cso_hash_first_node(hash); + void *cso = cso_hash_take(hash, cso_hash_iter_key(iter)); + delete_cso(cso, type); + --to_remove; + } +} + +struct cso_hash_iter +cso_insert_state(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type, + void *state) +{ + struct cso_hash *hash = _cso_hash_for_type(sc, type); + sanitize_hash(sc, hash, type, sc->max_size); + + return cso_hash_insert(hash, hash_key, state); +} + +struct cso_hash_iter +cso_find_state(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type) +{ + struct cso_hash *hash = _cso_hash_for_type(sc, type); + + return cso_hash_find(hash, hash_key); +} + + +void *cso_hash_find_data_from_template( struct cso_hash *hash, + unsigned hash_key, + void *templ, + int size ) +{ + struct cso_hash_iter iter = cso_hash_find(hash, hash_key); + while (!cso_hash_iter_is_null(iter)) { + void *iter_data = cso_hash_iter_data(iter); + if (!memcmp(iter_data, templ, size)) { + /* We found a match + */ + return iter_data; + } + iter = cso_hash_iter_next(iter); + } + return NULL; +} + + +struct cso_hash_iter cso_find_state_template(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type, + void *templ, unsigned size) +{ + struct cso_hash_iter iter = cso_find_state(sc, hash_key, type); + while (!cso_hash_iter_is_null(iter)) { + void *iter_data = cso_hash_iter_data(iter); + if (!memcmp(iter_data, templ, size)) + return iter; + iter = cso_hash_iter_next(iter); + } + return iter; +} + +void * cso_take_state(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type) +{ + struct cso_hash *hash = _cso_hash_for_type(sc, type); + return cso_hash_take(hash, hash_key); +} + +struct cso_cache *cso_cache_create(void) +{ + struct cso_cache *sc = MALLOC_STRUCT(cso_cache); + int i; + if (sc == NULL) + return NULL; + + sc->max_size = 4096; + for (i = 0; i < CSO_CACHE_MAX; i++) + sc->hashes[i] = cso_hash_create(); + + sc->sanitize_cb = sanitize_cb; + sc->sanitize_data = 0; + + return sc; +} + +void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, + cso_state_callback func, void *user_data) +{ + struct cso_hash *hash = _cso_hash_for_type(sc, type); + struct cso_hash_iter iter; + + iter = cso_hash_first_node(hash); + while (!cso_hash_iter_is_null(iter)) { + void *state = cso_hash_iter_data(iter); + iter = cso_hash_iter_next(iter); + if (state) { + func(state, user_data); + } + } +} + +void cso_cache_delete(struct cso_cache *sc) +{ + int i; + assert(sc); + + if (!sc) + return; + + /* delete driver data */ + cso_for_each_state(sc, CSO_BLEND, delete_blend_state, 0); + cso_for_each_state(sc, CSO_DEPTH_STENCIL_ALPHA, delete_depth_stencil_state, 0); + cso_for_each_state(sc, CSO_RASTERIZER, delete_rasterizer_state, 0); + cso_for_each_state(sc, CSO_SAMPLER, delete_sampler_state, 0); + cso_for_each_state(sc, CSO_VELEMENTS, delete_velements, 0); + + for (i = 0; i < CSO_CACHE_MAX; i++) + cso_hash_delete(sc->hashes[i]); + + FREE(sc); +} + +void cso_set_maximum_cache_size(struct cso_cache *sc, int number) +{ + int i; + + sc->max_size = number; + + for (i = 0; i < CSO_CACHE_MAX; i++) + sanitize_hash(sc, sc->hashes[i], i, sc->max_size); +} + +int cso_maximum_cache_size(const struct cso_cache *sc) +{ + return sc->max_size; +} + +void cso_cache_set_sanitize_callback(struct cso_cache *sc, + cso_sanitize_callback cb, + void *user_data) +{ + sc->sanitize_cb = cb; + sc->sanitize_data = user_data; +} + diff --git a/drivers/video/Gallium/auxiliary/cso_cache/cso_cache.h b/drivers/video/Gallium/auxiliary/cso_cache/cso_cache.h new file mode 100644 index 0000000000..cc1f1c0e12 --- /dev/null +++ b/drivers/video/Gallium/auxiliary/cso_cache/cso_cache.h @@ -0,0 +1,175 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /** + * @file + * Constant State Object (CSO) cache. + * + * The basic idea is that the states are created via the + * create_state/bind_state/delete_state semantics. The driver is expected to + * perform as much of the Gallium state translation to whatever its internal + * representation is during the create call. Gallium then has a caching + * mechanism where it stores the created states. When the pipeline needs an + * actual state change, a bind call is issued. In the bind call the driver + * gets its already translated representation. + * + * Those semantics mean that the driver doesn't do the repeated translations + * of states on every frame, but only once, when a new state is actually + * created. + * + * Even on hardware that doesn't do any kind of state cache, it makes the + * driver look a lot neater, plus it avoids all the redundant state + * translations on every frame. + * + * Currently our constant state objects are: + * - alpha test + * - blend + * - depth stencil + * - fragment shader + * - rasterizer (old setup) + * - sampler + * - vertex shader + * - vertex elements + * + * Things that are not constant state objects include: + * - blend_color + * - clip_state + * - clear_color_state + * - constant_buffer + * - feedback_state + * - framebuffer_state + * - polygon_stipple + * - scissor_state + * - texture_state + * - viewport_state + * + * @author Zack Rusin + */ + +#ifndef CSO_CACHE_H +#define CSO_CACHE_H + +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +/* cso_hash.h is necessary for cso_hash_iter, as MSVC requires structures + * returned by value to be fully defined */ +#include "cso_hash.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +enum cso_cache_type { + CSO_RASTERIZER, + CSO_BLEND, + CSO_DEPTH_STENCIL_ALPHA, + CSO_SAMPLER, + CSO_VELEMENTS, + CSO_CACHE_MAX, +}; + +typedef void (*cso_state_callback)(void *ctx, void *obj); + +typedef void (*cso_sanitize_callback)(struct cso_hash *hash, + enum cso_cache_type type, + int max_size, + void *user_data); + +struct cso_cache; + +struct cso_blend { + struct pipe_blend_state state; + void *data; + cso_state_callback delete_state; + struct pipe_context *context; +}; + +struct cso_depth_stencil_alpha { + struct pipe_depth_stencil_alpha_state state; + void *data; + cso_state_callback delete_state; + struct pipe_context *context; +}; + +struct cso_rasterizer { + struct pipe_rasterizer_state state; + void *data; + cso_state_callback delete_state; + struct pipe_context *context; +}; + +struct cso_sampler { + struct pipe_sampler_state state; + void *data; + cso_state_callback delete_state; + struct pipe_context *context; +}; + +struct cso_velems_state { + unsigned count; + struct pipe_vertex_element velems[PIPE_MAX_ATTRIBS]; +}; + +struct cso_velements { + struct cso_velems_state state; + void *data; + cso_state_callback delete_state; + struct pipe_context *context; +}; + +unsigned cso_construct_key(void *item, int item_size); + +struct cso_cache *cso_cache_create(void); +void cso_cache_delete(struct cso_cache *sc); + +void cso_cache_set_sanitize_callback(struct cso_cache *sc, + cso_sanitize_callback cb, + void *user_data); + +struct cso_hash_iter cso_insert_state(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type, + void *state); +struct cso_hash_iter cso_find_state(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type); +struct cso_hash_iter cso_find_state_template(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type, + void *templ, unsigned size); +void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, + cso_state_callback func, void *user_data); +void * cso_take_state(struct cso_cache *sc, unsigned hash_key, + enum cso_cache_type type); + +void cso_set_maximum_cache_size(struct cso_cache *sc, int number); +int cso_maximum_cache_size(const struct cso_cache *sc); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/drivers/video/Gallium/auxiliary/cso_cache/cso_context.c b/drivers/video/Gallium/auxiliary/cso_cache/cso_context.c new file mode 100644 index 0000000000..6805427b81 --- /dev/null +++ b/drivers/video/Gallium/auxiliary/cso_cache/cso_context.c @@ -0,0 +1,1431 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /** + * @file + * + * Wrap the cso cache & hash mechanisms in a simplified + * pipe-driver-specific interface. + * + * @author Zack Rusin + * @author Keith Whitwell + */ + +#include "pipe/p_state.h" +#include "util/u_draw.h" +#include "util/u_framebuffer.h" +#include "util/u_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_vbuf.h" +#include "tgsi/tgsi_parse.h" + +#include "cso_cache/cso_context.h" +#include "cso_cache/cso_cache.h" +#include "cso_cache/cso_hash.h" +#include "cso_context.h" + + +/** + * Info related to samplers and sampler views. + * We have one of these for fragment samplers and another for vertex samplers. + */ +struct sampler_info +{ + struct { + void *samplers[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + } hw; + + void *samplers[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + + void *samplers_saved[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers_saved; + + struct pipe_sampler_view *views[PIPE_MAX_SAMPLERS]; + unsigned nr_views; + + struct pipe_sampler_view *views_saved[PIPE_MAX_SAMPLERS]; + unsigned nr_views_saved; +}; + + + +struct cso_context { + struct pipe_context *pipe; + struct cso_cache *cache; + struct u_vbuf *vbuf; + + boolean has_geometry_shader; + boolean has_streamout; + + struct sampler_info samplers[PIPE_SHADER_TYPES]; + + struct pipe_vertex_buffer aux_vertex_buffer_current; + struct pipe_vertex_buffer aux_vertex_buffer_saved; + unsigned aux_vertex_buffer_index; + + struct pipe_constant_buffer aux_constbuf_current[PIPE_SHADER_TYPES]; + struct pipe_constant_buffer aux_constbuf_saved[PIPE_SHADER_TYPES]; + + unsigned nr_so_targets; + struct pipe_stream_output_target *so_targets[PIPE_MAX_SO_BUFFERS]; + + unsigned nr_so_targets_saved; + struct pipe_stream_output_target *so_targets_saved[PIPE_MAX_SO_BUFFERS]; + + /** Current and saved state. + * The saved state is used as a 1-deep stack. + */ + void *blend, *blend_saved; + void *depth_stencil, *depth_stencil_saved; + void *rasterizer, *rasterizer_saved; + void *fragment_shader, *fragment_shader_saved; + void *vertex_shader, *vertex_shader_saved; + void *geometry_shader, *geometry_shader_saved; + void *velements, *velements_saved; + struct pipe_query *render_condition, *render_condition_saved; + uint render_condition_mode, render_condition_mode_saved; + boolean render_condition_cond, render_condition_cond_saved; + + struct pipe_clip_state clip; + struct pipe_clip_state clip_saved; + + struct pipe_framebuffer_state fb, fb_saved; + struct pipe_viewport_state vp, vp_saved; + struct pipe_blend_color blend_color; + unsigned sample_mask, sample_mask_saved; + struct pipe_stencil_ref stencil_ref, stencil_ref_saved; +}; + + +static boolean delete_blend_state(struct cso_context *ctx, void *state) +{ + struct cso_blend *cso = (struct cso_blend *)state; + + if (ctx->blend == cso->data) + return FALSE; + + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + return TRUE; +} + +static boolean delete_depth_stencil_state(struct cso_context *ctx, void *state) +{ + struct cso_depth_stencil_alpha *cso = + (struct cso_depth_stencil_alpha *)state; + + if (ctx->depth_stencil == cso->data) + return FALSE; + + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + + return TRUE; +} + +static boolean delete_sampler_state(struct cso_context *ctx, void *state) +{ + struct cso_sampler *cso = (struct cso_sampler *)state; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + return TRUE; +} + +static boolean delete_rasterizer_state(struct cso_context *ctx, void *state) +{ + struct cso_rasterizer *cso = (struct cso_rasterizer *)state; + + if (ctx->rasterizer == cso->data) + return FALSE; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + return TRUE; +} + +static boolean delete_vertex_elements(struct cso_context *ctx, + void *state) +{ + struct cso_velements *cso = (struct cso_velements *)state; + + if (ctx->velements == cso->data) + return FALSE; + + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + return TRUE; +} + + +static INLINE boolean delete_cso(struct cso_context *ctx, + void *state, enum cso_cache_type type) +{ + switch (type) { + case CSO_BLEND: + return delete_blend_state(ctx, state); + case CSO_SAMPLER: + return delete_sampler_state(ctx, state); + case CSO_DEPTH_STENCIL_ALPHA: + return delete_depth_stencil_state(ctx, state); + case CSO_RASTERIZER: + return delete_rasterizer_state(ctx, state); + case CSO_VELEMENTS: + return delete_vertex_elements(ctx, state); + default: + assert(0); + FREE(state); + } + return FALSE; +} + +static INLINE void +sanitize_hash(struct cso_hash *hash, enum cso_cache_type type, + int max_size, void *user_data) +{ + struct cso_context *ctx = (struct cso_context *)user_data; + /* if we're approach the maximum size, remove fourth of the entries + * otherwise every subsequent call will go through the same */ + int hash_size = cso_hash_size(hash); + int max_entries = (max_size > hash_size) ? max_size : hash_size; + int to_remove = (max_size < max_entries) * max_entries/4; + struct cso_hash_iter iter = cso_hash_first_node(hash); + if (hash_size > max_size) + to_remove += hash_size - max_size; + while (to_remove) { + /*remove elements until we're good */ + /*fixme: currently we pick the nodes to remove at random*/ + void *cso = cso_hash_iter_data(iter); + if (delete_cso(ctx, cso, type)) { + iter = cso_hash_erase(hash, iter); + --to_remove; + } else + iter = cso_hash_iter_next(iter); + } +} + +static void cso_init_vbuf(struct cso_context *cso) +{ + struct u_vbuf_caps caps; + + u_vbuf_get_caps(cso->pipe->screen, &caps); + + /* Install u_vbuf if there is anything unsupported. */ + if (!caps.buffer_offset_unaligned || + !caps.buffer_stride_unaligned || + !caps.velem_src_offset_unaligned || + !caps.format_fixed32 || + !caps.format_float16 || + !caps.format_float64 || + !caps.format_norm32 || + !caps.format_scaled32 || + !caps.user_vertex_buffers) { + cso->vbuf = u_vbuf_create(cso->pipe, &caps, + cso->aux_vertex_buffer_index); + } +} + +struct cso_context *cso_create_context( struct pipe_context *pipe ) +{ + struct cso_context *ctx = CALLOC_STRUCT(cso_context); + if (ctx == NULL) + goto out; + + ctx->cache = cso_cache_create(); + if (ctx->cache == NULL) + goto out; + cso_cache_set_sanitize_callback(ctx->cache, + sanitize_hash, + ctx); + + ctx->pipe = pipe; + ctx->sample_mask_saved = ~0; + + ctx->aux_vertex_buffer_index = 0; /* 0 for now */ + + cso_init_vbuf(ctx); + + /* Enable for testing: */ + if (0) cso_set_maximum_cache_size( ctx->cache, 4 ); + + if (pipe->screen->get_shader_param(pipe->screen, PIPE_SHADER_GEOMETRY, + PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) { + ctx->has_geometry_shader = TRUE; + } + if (pipe->screen->get_param(pipe->screen, + PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS) != 0) { + ctx->has_streamout = TRUE; + } + + return ctx; + +out: + cso_destroy_context( ctx ); + return NULL; +} + +/** + * Prior to context destruction, this function unbinds all state objects. + */ +void cso_release_all( struct cso_context *ctx ) +{ + unsigned i, shader; + + if (ctx->pipe) { + ctx->pipe->bind_blend_state( ctx->pipe, NULL ); + ctx->pipe->bind_rasterizer_state( ctx->pipe, NULL ); + ctx->pipe->bind_fragment_sampler_states( ctx->pipe, 0, NULL ); + if (ctx->pipe->bind_vertex_sampler_states) + ctx->pipe->bind_vertex_sampler_states(ctx->pipe, 0, NULL); + ctx->pipe->bind_depth_stencil_alpha_state( ctx->pipe, NULL ); + ctx->pipe->bind_fs_state( ctx->pipe, NULL ); + ctx->pipe->bind_vs_state( ctx->pipe, NULL ); + ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL ); + ctx->pipe->set_fragment_sampler_views(ctx->pipe, 0, NULL); + if (ctx->pipe->set_vertex_sampler_views) + ctx->pipe->set_vertex_sampler_views(ctx->pipe, 0, NULL); + if (ctx->pipe->set_stream_output_targets) + ctx->pipe->set_stream_output_targets(ctx->pipe, 0, NULL, 0); + } + + /* free fragment samplers, views */ + for (shader = 0; shader < Elements(ctx->samplers); shader++) { + struct sampler_info *info = &ctx->samplers[shader]; + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + pipe_sampler_view_reference(&info->views[i], NULL); + pipe_sampler_view_reference(&info->views_saved[i], NULL); + } + } + + util_unreference_framebuffer_state(&ctx->fb); + util_unreference_framebuffer_state(&ctx->fb_saved); + + pipe_resource_reference(&ctx->aux_vertex_buffer_current.buffer, NULL); + pipe_resource_reference(&ctx->aux_vertex_buffer_saved.buffer, NULL); + + for (i = 0; i < PIPE_SHADER_TYPES; i++) { + pipe_resource_reference(&ctx->aux_constbuf_current[i].buffer, NULL); + pipe_resource_reference(&ctx->aux_constbuf_saved[i].buffer, NULL); + } + + for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { + pipe_so_target_reference(&ctx->so_targets[i], NULL); + pipe_so_target_reference(&ctx->so_targets_saved[i], NULL); + } + + if (ctx->cache) { + cso_cache_delete( ctx->cache ); + ctx->cache = NULL; + } +} + + +/** + * Free the CSO context. NOTE: the state tracker should have previously called + * cso_release_all(). + */ +void cso_destroy_context( struct cso_context *ctx ) +{ + if (ctx) { + if (ctx->vbuf) + u_vbuf_destroy(ctx->vbuf); + FREE( ctx ); + } +} + + +/* Those function will either find the state of the given template + * in the cache or they will create a new state from the given + * template, insert it in the cache and return it. + */ + +/* + * If the driver returns 0 from the create method then they will assign + * the data member of the cso to be the template itself. + */ + +enum pipe_error cso_set_blend(struct cso_context *ctx, + const struct pipe_blend_state *templ) +{ + unsigned key_size, hash_key; + struct cso_hash_iter iter; + void *handle; + + key_size = templ->independent_blend_enable ? + sizeof(struct pipe_blend_state) : + (char *)&(templ->rt[1]) - (char *)templ; + hash_key = cso_construct_key((void*)templ, key_size); + iter = cso_find_state_template(ctx->cache, hash_key, CSO_BLEND, + (void*)templ, key_size); + + if (cso_hash_iter_is_null(iter)) { + struct cso_blend *cso = MALLOC(sizeof(struct cso_blend)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; + + memset(&cso->state, 0, sizeof cso->state); + memcpy(&cso->state, templ, key_size); + cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_blend_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_BLEND, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + handle = cso->data; + } + else { + handle = ((struct cso_blend *)cso_hash_iter_data(iter))->data; + } + + if (ctx->blend != handle) { + ctx->blend = handle; + ctx->pipe->bind_blend_state(ctx->pipe, handle); + } + return PIPE_OK; +} + +void cso_save_blend(struct cso_context *ctx) +{ + assert(!ctx->blend_saved); + ctx->blend_saved = ctx->blend; +} + +void cso_restore_blend(struct cso_context *ctx) +{ + if (ctx->blend != ctx->blend_saved) { + ctx->blend = ctx->blend_saved; + ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend_saved); + } + ctx->blend_saved = NULL; +} + + + +enum pipe_error +cso_set_depth_stencil_alpha(struct cso_context *ctx, + const struct pipe_depth_stencil_alpha_state *templ) +{ + unsigned key_size = sizeof(struct pipe_depth_stencil_alpha_state); + unsigned hash_key = cso_construct_key((void*)templ, key_size); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, + CSO_DEPTH_STENCIL_ALPHA, + (void*)templ, key_size); + void *handle; + + if (cso_hash_iter_is_null(iter)) { + struct cso_depth_stencil_alpha *cso = + MALLOC(sizeof(struct cso_depth_stencil_alpha)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; + + memcpy(&cso->state, templ, sizeof(*templ)); + cso->data = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, + &cso->state); + cso->delete_state = + (cso_state_callback)ctx->pipe->delete_depth_stencil_alpha_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, + CSO_DEPTH_STENCIL_ALPHA, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + handle = cso->data; + } + else { + handle = ((struct cso_depth_stencil_alpha *) + cso_hash_iter_data(iter))->data; + } + + if (ctx->depth_stencil != handle) { + ctx->depth_stencil = handle; + ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, handle); + } + return PIPE_OK; +} + +void cso_save_depth_stencil_alpha(struct cso_context *ctx) +{ + assert(!ctx->depth_stencil_saved); + ctx->depth_stencil_saved = ctx->depth_stencil; +} + +void cso_restore_depth_stencil_alpha(struct cso_context *ctx) +{ + if (ctx->depth_stencil != ctx->depth_stencil_saved) { + ctx->depth_stencil = ctx->depth_stencil_saved; + ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, + ctx->depth_stencil_saved); + } + ctx->depth_stencil_saved = NULL; +} + + + +enum pipe_error cso_set_rasterizer(struct cso_context *ctx, + const struct pipe_rasterizer_state *templ) +{ + unsigned key_size = sizeof(struct pipe_rasterizer_state); + unsigned hash_key = cso_construct_key((void*)templ, key_size); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, + CSO_RASTERIZER, + (void*)templ, key_size); + void *handle = NULL; + + if (cso_hash_iter_is_null(iter)) { + struct cso_rasterizer *cso = MALLOC(sizeof(struct cso_rasterizer)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; + + memcpy(&cso->state, templ, sizeof(*templ)); + cso->data = ctx->pipe->create_rasterizer_state(ctx->pipe, &cso->state); + cso->delete_state = + (cso_state_callback)ctx->pipe->delete_rasterizer_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_RASTERIZER, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + handle = cso->data; + } + else { + handle = ((struct cso_rasterizer *)cso_hash_iter_data(iter))->data; + } + + if (ctx->rasterizer != handle) { + ctx->rasterizer = handle; + ctx->pipe->bind_rasterizer_state(ctx->pipe, handle); + } + return PIPE_OK; +} + +void cso_save_rasterizer(struct cso_context *ctx) +{ + assert(!ctx->rasterizer_saved); + ctx->rasterizer_saved = ctx->rasterizer; +} + +void cso_restore_rasterizer(struct cso_context *ctx) +{ + if (ctx->rasterizer != ctx->rasterizer_saved) { + ctx->rasterizer = ctx->rasterizer_saved; + ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rasterizer_saved); + } + ctx->rasterizer_saved = NULL; +} + + +void cso_set_fragment_shader_handle(struct cso_context *ctx, void *handle ) +{ + if (ctx->fragment_shader != handle) { + ctx->fragment_shader = handle; + ctx->pipe->bind_fs_state(ctx->pipe, handle); + } +} + +void cso_delete_fragment_shader(struct cso_context *ctx, void *handle ) +{ + if (handle == ctx->fragment_shader) { + /* unbind before deleting */ + ctx->pipe->bind_fs_state(ctx->pipe, NULL); + ctx->fragment_shader = NULL; + } + ctx->pipe->delete_fs_state(ctx->pipe, handle); +} + +void cso_save_fragment_shader(struct cso_context *ctx) +{ + assert(!ctx->fragment_shader_saved); + ctx->fragment_shader_saved = ctx->fragment_shader; +} + +void cso_restore_fragment_shader(struct cso_context *ctx) +{ + if (ctx->fragment_shader_saved != ctx->fragment_shader) { + ctx->pipe->bind_fs_state(ctx->pipe, ctx->fragment_shader_saved); + ctx->fragment_shader = ctx->fragment_shader_saved; + } + ctx->fragment_shader_saved = NULL; +} + + +void cso_set_vertex_shader_handle(struct cso_context *ctx, void *handle) +{ + if (ctx->vertex_shader != handle) { + ctx->vertex_shader = handle; + ctx->pipe->bind_vs_state(ctx->pipe, handle); + } +} + +void cso_delete_vertex_shader(struct cso_context *ctx, void *handle ) +{ + if (handle == ctx->vertex_shader) { + /* unbind before deleting */ + ctx->pipe->bind_vs_state(ctx->pipe, NULL); + ctx->vertex_shader = NULL; + } + ctx->pipe->delete_vs_state(ctx->pipe, handle); +} + +void cso_save_vertex_shader(struct cso_context *ctx) +{ + assert(!ctx->vertex_shader_saved); + ctx->vertex_shader_saved = ctx->vertex_shader; +} + +void cso_restore_vertex_shader(struct cso_context *ctx) +{ + if (ctx->vertex_shader_saved != ctx->vertex_shader) { + ctx->pipe->bind_vs_state(ctx->pipe, ctx->vertex_shader_saved); + ctx->vertex_shader = ctx->vertex_shader_saved; + } + ctx->vertex_shader_saved = NULL; +} + + +void cso_set_framebuffer(struct cso_context *ctx, + const struct pipe_framebuffer_state *fb) +{ + if (memcmp(&ctx->fb, fb, sizeof(*fb)) != 0) { + util_copy_framebuffer_state(&ctx->fb, fb); + ctx->pipe->set_framebuffer_state(ctx->pipe, fb); + } +} + +void cso_save_framebuffer(struct cso_context *ctx) +{ + util_copy_framebuffer_state(&ctx->fb_saved, &ctx->fb); +} + +void cso_restore_framebuffer(struct cso_context *ctx) +{ + if (memcmp(&ctx->fb, &ctx->fb_saved, sizeof(ctx->fb))) { + util_copy_framebuffer_state(&ctx->fb, &ctx->fb_saved); + ctx->pipe->set_framebuffer_state(ctx->pipe, &ctx->fb); + util_unreference_framebuffer_state(&ctx->fb_saved); + } +} + + +void cso_set_viewport(struct cso_context *ctx, + const struct pipe_viewport_state *vp) +{ + if (memcmp(&ctx->vp, vp, sizeof(*vp))) { + ctx->vp = *vp; + ctx->pipe->set_viewport_states(ctx->pipe, 0, 1, vp); + } +} + +void cso_save_viewport(struct cso_context *ctx) +{ + ctx->vp_saved = ctx->vp; +} + + +void cso_restore_viewport(struct cso_context *ctx) +{ + if (memcmp(&ctx->vp, &ctx->vp_saved, sizeof(ctx->vp))) { + ctx->vp = ctx->vp_saved; + ctx->pipe->set_viewport_states(ctx->pipe, 0, 1, &ctx->vp); + } +} + + +void cso_set_blend_color(struct cso_context *ctx, + const struct pipe_blend_color *bc) +{ + if (memcmp(&ctx->blend_color, bc, sizeof(ctx->blend_color))) { + ctx->blend_color = *bc; + ctx->pipe->set_blend_color(ctx->pipe, bc); + } +} + +void cso_set_sample_mask(struct cso_context *ctx, unsigned sample_mask) +{ + if (ctx->sample_mask != sample_mask) { + ctx->sample_mask = sample_mask; + ctx->pipe->set_sample_mask(ctx->pipe, sample_mask); + } +} + +void cso_save_sample_mask(struct cso_context *ctx) +{ + ctx->sample_mask_saved = ctx->sample_mask; +} + +void cso_restore_sample_mask(struct cso_context *ctx) +{ + cso_set_sample_mask(ctx, ctx->sample_mask_saved); +} + +void cso_set_stencil_ref(struct cso_context *ctx, + const struct pipe_stencil_ref *sr) +{ + if (memcmp(&ctx->stencil_ref, sr, sizeof(ctx->stencil_ref))) { + ctx->stencil_ref = *sr; + ctx->pipe->set_stencil_ref(ctx->pipe, sr); + } +} + +void cso_save_stencil_ref(struct cso_context *ctx) +{ + ctx->stencil_ref_saved = ctx->stencil_ref; +} + + +void cso_restore_stencil_ref(struct cso_context *ctx) +{ + if (memcmp(&ctx->stencil_ref, &ctx->stencil_ref_saved, + sizeof(ctx->stencil_ref))) { + ctx->stencil_ref = ctx->stencil_ref_saved; + ctx->pipe->set_stencil_ref(ctx->pipe, &ctx->stencil_ref); + } +} + +void cso_set_render_condition(struct cso_context *ctx, + struct pipe_query *query, + boolean condition, uint mode) +{ + struct pipe_context *pipe = ctx->pipe; + + if (ctx->render_condition != query || + ctx->render_condition_mode != mode || + ctx->render_condition_cond != condition) { + pipe->render_condition(pipe, query, condition, mode); + ctx->render_condition = query; + ctx->render_condition_cond = condition; + ctx->render_condition_mode = mode; + } +} + +void cso_save_render_condition(struct cso_context *ctx) +{ + ctx->render_condition_saved = ctx->render_condition; + ctx->render_condition_cond_saved = ctx->render_condition_cond; + ctx->render_condition_mode_saved = ctx->render_condition_mode; +} + +void cso_restore_render_condition(struct cso_context *ctx) +{ + cso_set_render_condition(ctx, ctx->render_condition_saved, + ctx->render_condition_cond_saved, + ctx->render_condition_mode_saved); +} + +void cso_set_geometry_shader_handle(struct cso_context *ctx, void *handle) +{ + assert(ctx->has_geometry_shader || !handle); + + if (ctx->has_geometry_shader && ctx->geometry_shader != handle) { + ctx->geometry_shader = handle; + ctx->pipe->bind_gs_state(ctx->pipe, handle); + } +} + +void cso_delete_geometry_shader(struct cso_context *ctx, void *handle) +{ + if (handle == ctx->geometry_shader) { + /* unbind before deleting */ + ctx->pipe->bind_gs_state(ctx->pipe, NULL); + ctx->geometry_shader = NULL; + } + ctx->pipe->delete_gs_state(ctx->pipe, handle); +} + +void cso_save_geometry_shader(struct cso_context *ctx) +{ + if (!ctx->has_geometry_shader) { + return; + } + + assert(!ctx->geometry_shader_saved); + ctx->geometry_shader_saved = ctx->geometry_shader; +} + +void cso_restore_geometry_shader(struct cso_context *ctx) +{ + if (!ctx->has_geometry_shader) { + return; + } + + if (ctx->geometry_shader_saved != ctx->geometry_shader) { + ctx->pipe->bind_gs_state(ctx->pipe, ctx->geometry_shader_saved); + ctx->geometry_shader = ctx->geometry_shader_saved; + } + ctx->geometry_shader_saved = NULL; +} + +/* clip state */ + +static INLINE void +clip_state_cpy(struct pipe_clip_state *dst, + const struct pipe_clip_state *src) +{ + memcpy(dst->ucp, src->ucp, sizeof(dst->ucp)); +} + +static INLINE int +clip_state_cmp(const struct pipe_clip_state *a, + const struct pipe_clip_state *b) +{ + return memcmp(a->ucp, b->ucp, sizeof(a->ucp)); +} + +void +cso_set_clip(struct cso_context *ctx, + const struct pipe_clip_state *clip) +{ + if (clip_state_cmp(&ctx->clip, clip)) { + clip_state_cpy(&ctx->clip, clip); + ctx->pipe->set_clip_state(ctx->pipe, clip); + } +} + +void +cso_save_clip(struct cso_context *ctx) +{ + clip_state_cpy(&ctx->clip_saved, &ctx->clip); +} + +void +cso_restore_clip(struct cso_context *ctx) +{ + if (clip_state_cmp(&ctx->clip, &ctx->clip_saved)) { + clip_state_cpy(&ctx->clip, &ctx->clip_saved); + ctx->pipe->set_clip_state(ctx->pipe, &ctx->clip_saved); + } +} + +enum pipe_error +cso_set_vertex_elements(struct cso_context *ctx, + unsigned count, + const struct pipe_vertex_element *states) +{ + struct u_vbuf *vbuf = ctx->vbuf; + unsigned key_size, hash_key; + struct cso_hash_iter iter; + void *handle; + struct cso_velems_state velems_state; + + if (vbuf) { + u_vbuf_set_vertex_elements(vbuf, count, states); + return PIPE_OK; + } + + /* Need to include the count into the stored state data too. + * Otherwise first few count pipe_vertex_elements could be identical + * even if count is different, and there's no guarantee the hash would + * be different in that case neither. + */ + key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned); + velems_state.count = count; + memcpy(velems_state.velems, states, + sizeof(struct pipe_vertex_element) * count); + hash_key = cso_construct_key((void*)&velems_state, key_size); + iter = cso_find_state_template(ctx->cache, hash_key, CSO_VELEMENTS, + (void*)&velems_state, key_size); + + if (cso_hash_iter_is_null(iter)) { + struct cso_velements *cso = MALLOC(sizeof(struct cso_velements)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; + + memcpy(&cso->state, &velems_state, key_size); + cso->data = ctx->pipe->create_vertex_elements_state(ctx->pipe, count, + &cso->state.velems[0]); + cso->delete_state = + (cso_state_callback) ctx->pipe->delete_vertex_elements_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_VELEMENTS, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + handle = cso->data; + } + else { + handle = ((struct cso_velements *)cso_hash_iter_data(iter))->data; + } + + if (ctx->velements != handle) { + ctx->velements = handle; + ctx->pipe->bind_vertex_elements_state(ctx->pipe, handle); + } + return PIPE_OK; +} + +void cso_save_vertex_elements(struct cso_context *ctx) +{ + struct u_vbuf *vbuf = ctx->vbuf; + + if (vbuf) { + u_vbuf_save_vertex_elements(vbuf); + return; + } + + assert(!ctx->velements_saved); + ctx->velements_saved = ctx->velements; +} + +void cso_restore_vertex_elements(struct cso_context *ctx) +{ + struct u_vbuf *vbuf = ctx->vbuf; + + if (vbuf) { + u_vbuf_restore_vertex_elements(vbuf); + return; + } + + if (ctx->velements != ctx->velements_saved) { + ctx->velements = ctx->velements_saved; + ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->velements_saved); + } + ctx->velements_saved = NULL; +} + +/* vertex buffers */ + +void cso_set_vertex_buffers(struct cso_context *ctx, + unsigned start_slot, unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct u_vbuf *vbuf = ctx->vbuf; + + if (vbuf) { + u_vbuf_set_vertex_buffers(vbuf, start_slot, count, buffers); + return; + } + + /* Save what's in the auxiliary slot, so that we can save and restore it + * for meta ops. */ + if (start_slot <= ctx->aux_vertex_buffer_index && + start_slot+count > ctx->aux_vertex_buffer_index) { + if (buffers) { + const struct pipe_vertex_buffer *vb = + buffers + (ctx->aux_vertex_buffer_index - start_slot); + + pipe_resource_reference(&ctx->aux_vertex_buffer_current.buffer, + vb->buffer); + memcpy(&ctx->aux_vertex_buffer_current, vb, + sizeof(struct pipe_vertex_buffer)); + } + else { + pipe_resource_reference(&ctx->aux_vertex_buffer_current.buffer, + NULL); + ctx->aux_vertex_buffer_current.user_buffer = NULL; + } + } + + ctx->pipe->set_vertex_buffers(ctx->pipe, start_slot, count, buffers); +} + +void cso_save_aux_vertex_buffer_slot(struct cso_context *ctx) +{ + struct u_vbuf *vbuf = ctx->vbuf; + + if (vbuf) { + u_vbuf_save_aux_vertex_buffer_slot(vbuf); + return; + } + + pipe_resource_reference(&ctx->aux_vertex_buffer_saved.buffer, + ctx->aux_vertex_buffer_current.buffer); + memcpy(&ctx->aux_vertex_buffer_saved, &ctx->aux_vertex_buffer_current, + sizeof(struct pipe_vertex_buffer)); +} + +void cso_restore_aux_vertex_buffer_slot(struct cso_context *ctx) +{ + struct u_vbuf *vbuf = ctx->vbuf; + + if (vbuf) { + u_vbuf_restore_aux_vertex_buffer_slot(vbuf); + return; + } + + cso_set_vertex_buffers(ctx, ctx->aux_vertex_buffer_index, 1, + &ctx->aux_vertex_buffer_saved); + pipe_resource_reference(&ctx->aux_vertex_buffer_saved.buffer, NULL); +} + +unsigned cso_get_aux_vertex_buffer_slot(struct cso_context *ctx) +{ + return ctx->aux_vertex_buffer_index; +} + + +/**************** fragment/vertex sampler view state *************************/ + +static enum pipe_error +single_sampler(struct cso_context *ctx, + struct sampler_info *info, + unsigned idx, + const struct pipe_sampler_state *templ) +{ + void *handle = NULL; + + if (templ != NULL) { + unsigned key_size = sizeof(struct pipe_sampler_state); + unsigned hash_key = cso_construct_key((void*)templ, key_size); + struct cso_hash_iter iter = + cso_find_state_template(ctx->cache, + hash_key, CSO_SAMPLER, + (void *) templ, key_size); + + if (cso_hash_iter_is_null(iter)) { + struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; + + memcpy(&cso->state, templ, sizeof(*templ)); + cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state); + cso->delete_state = + (cso_state_callback) ctx->pipe->delete_sampler_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + handle = cso->data; + } + else { + handle = ((struct cso_sampler *)cso_hash_iter_data(iter))->data; + } + } + + info->samplers[idx] = handle; + + return PIPE_OK; +} + +enum pipe_error +cso_single_sampler(struct cso_context *ctx, + unsigned shader_stage, + unsigned idx, + const struct pipe_sampler_state *templ) +{ + return single_sampler(ctx, &ctx->samplers[shader_stage], idx, templ); +} + + + +static void +single_sampler_done(struct cso_context *ctx, unsigned shader_stage) +{ + struct sampler_info *info = &ctx->samplers[shader_stage]; + unsigned i; + + /* find highest non-null sampler */ + for (i = PIPE_MAX_SAMPLERS; i > 0; i--) { + if (info->samplers[i - 1] != NULL) + break; + } + + info->nr_samplers = i; + + if (info->hw.nr_samplers != info->nr_samplers || + memcmp(info->hw.samplers, + info->samplers, + info->nr_samplers * sizeof(void *)) != 0) + { + memcpy(info->hw.samplers, + info->samplers, + info->nr_samplers * sizeof(void *)); + info->hw.nr_samplers = info->nr_samplers; + + switch (shader_stage) { + case PIPE_SHADER_FRAGMENT: + ctx->pipe->bind_fragment_sampler_states(ctx->pipe, + info->nr_samplers, + info->samplers); + break; + case PIPE_SHADER_VERTEX: + ctx->pipe->bind_vertex_sampler_states(ctx->pipe, + info->nr_samplers, + info->samplers); + break; + case PIPE_SHADER_GEOMETRY: + ctx->pipe->bind_geometry_sampler_states(ctx->pipe, + info->nr_samplers, + info->samplers); + break; + default: + assert(!"bad shader type in single_sampler_done()"); + } + } +} + +void +cso_single_sampler_done(struct cso_context *ctx, unsigned shader_stage) +{ + single_sampler_done(ctx, shader_stage); +} + + +/* + * If the function encouters any errors it will return the + * last one. Done to always try to set as many samplers + * as possible. + */ +enum pipe_error +cso_set_samplers(struct cso_context *ctx, + unsigned shader_stage, + unsigned nr, + const struct pipe_sampler_state **templates) +{ + struct sampler_info *info = &ctx->samplers[shader_stage]; + unsigned i; + enum pipe_error temp, error = PIPE_OK; + + /* TODO: fastpath + */ + + for (i = 0; i < nr; i++) { + temp = single_sampler(ctx, info, i, templates[i]); + if (temp != PIPE_OK) + error = temp; + } + + for ( ; i < info->nr_samplers; i++) { + temp = single_sampler(ctx, info, i, NULL); + if (temp != PIPE_OK) + error = temp; + } + + single_sampler_done(ctx, shader_stage); + + return error; +} + +void +cso_save_samplers(struct cso_context *ctx, unsigned shader_stage) +{ + struct sampler_info *info = &ctx->samplers[shader_stage]; + info->nr_samplers_saved = info->nr_samplers; + memcpy(info->samplers_saved, info->samplers, sizeof(info->samplers)); +} + + +void +cso_restore_samplers(struct cso_context *ctx, unsigned shader_stage) +{ + struct sampler_info *info = &ctx->samplers[shader_stage]; + info->nr_samplers = info->nr_samplers_saved; + memcpy(info->samplers, info->samplers_saved, sizeof(info->samplers)); + single_sampler_done(ctx, shader_stage); +} + + +void +cso_set_sampler_views(struct cso_context *ctx, + unsigned shader_stage, + unsigned count, + struct pipe_sampler_view **views) +{ + struct sampler_info *info = &ctx->samplers[shader_stage]; + unsigned i; + + /* reference new views */ + for (i = 0; i < count; i++) { + pipe_sampler_view_reference(&info->views[i], views[i]); + } + /* unref extra old views, if any */ + for (; i < info->nr_views; i++) { + pipe_sampler_view_reference(&info->views[i], NULL); + } + + info->nr_views = count; + + /* bind the new sampler views */ + switch (shader_stage) { + case PIPE_SHADER_FRAGMENT: + ctx->pipe->set_fragment_sampler_views(ctx->pipe, count, info->views); + break; + case PIPE_SHADER_VERTEX: + ctx->pipe->set_vertex_sampler_views(ctx->pipe, count, info->views); + break; + case PIPE_SHADER_GEOMETRY: + ctx->pipe->set_geometry_sampler_views(ctx->pipe, count, info->views); + break; + default: + assert(!"bad shader type in cso_set_sampler_views()"); + } +} + + +void +cso_save_sampler_views(struct cso_context *ctx, unsigned shader_stage) +{ + struct sampler_info *info = &ctx->samplers[shader_stage]; + unsigned i; + + info->nr_views_saved = info->nr_views; + + for (i = 0; i < info->nr_views; i++) { + assert(!info->views_saved[i]); + pipe_sampler_view_reference(&info->views_saved[i], info->views[i]); + } +} + + +void +cso_restore_sampler_views(struct cso_context *ctx, unsigned shader_stage) +{ + struct sampler_info *info = &ctx->samplers[shader_stage]; + unsigned i, nr_saved = info->nr_views_saved; + + for (i = 0; i < nr_saved; i++) { + pipe_sampler_view_reference(&info->views[i], NULL); + /* move the reference from one pointer to another */ + info->views[i] = info->views_saved[i]; + info->views_saved[i] = NULL; + } + for (; i < info->nr_views; i++) { + pipe_sampler_view_reference(&info->views[i], NULL); + } + + /* bind the old/saved sampler views */ + switch (shader_stage) { + case PIPE_SHADER_FRAGMENT: + ctx->pipe->set_fragment_sampler_views(ctx->pipe, nr_saved, info->views); + break; + case PIPE_SHADER_VERTEX: + ctx->pipe->set_vertex_sampler_views(ctx->pipe, nr_saved, info->views); + break; + case PIPE_SHADER_GEOMETRY: + ctx->pipe->set_geometry_sampler_views(ctx->pipe, nr_saved, info->views); + break; + default: + assert(!"bad shader type in cso_restore_sampler_views()"); + } + + info->nr_views = nr_saved; + info->nr_views_saved = 0; +} + + +void +cso_set_stream_outputs(struct cso_context *ctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + unsigned append_bitmask) +{ + struct pipe_context *pipe = ctx->pipe; + uint i; + + if (!ctx->has_streamout) { + assert(num_targets == 0); + return; + } + + if (ctx->nr_so_targets == 0 && num_targets == 0) { + /* Nothing to do. */ + return; + } + + /* reference new targets */ + for (i = 0; i < num_targets; i++) { + pipe_so_target_reference(&ctx->so_targets[i], targets[i]); + } + /* unref extra old targets, if any */ + for (; i < ctx->nr_so_targets; i++) { + pipe_so_target_reference(&ctx->so_targets[i], NULL); + } + + pipe->set_stream_output_targets(pipe, num_targets, targets, + append_bitmask); + ctx->nr_so_targets = num_targets; +} + +void +cso_save_stream_outputs(struct cso_context *ctx) +{ + uint i; + + if (!ctx->has_streamout) { + return; + } + + ctx->nr_so_targets_saved = ctx->nr_so_targets; + + for (i = 0; i < ctx->nr_so_targets; i++) { + assert(!ctx->so_targets_saved[i]); + pipe_so_target_reference(&ctx->so_targets_saved[i], ctx->so_targets[i]); + } +} + +void +cso_restore_stream_outputs(struct cso_context *ctx) +{ + struct pipe_context *pipe = ctx->pipe; + uint i; + + if (!ctx->has_streamout) { + return; + } + + if (ctx->nr_so_targets == 0 && ctx->nr_so_targets_saved == 0) { + /* Nothing to do. */ + return; + } + + for (i = 0; i < ctx->nr_so_targets_saved; i++) { + pipe_so_target_reference(&ctx->so_targets[i], NULL); + /* move the reference from one pointer to another */ + ctx->so_targets[i] = ctx->so_targets_saved[i]; + ctx->so_targets_saved[i] = NULL; + } + for (; i < ctx->nr_so_targets; i++) { + pipe_so_target_reference(&ctx->so_targets[i], NULL); + } + + /* ~0 means append */ + pipe->set_stream_output_targets(pipe, ctx->nr_so_targets_saved, + ctx->so_targets, ~0); + + ctx->nr_so_targets = ctx->nr_so_targets_saved; + ctx->nr_so_targets_saved = 0; +} + +/* constant buffers */ + +void +cso_set_constant_buffer(struct cso_context *cso, unsigned shader_stage, + unsigned index, struct pipe_constant_buffer *cb) +{ + struct pipe_context *pipe = cso->pipe; + + pipe->set_constant_buffer(pipe, shader_stage, index, cb); + + if (index == 0) { + util_copy_constant_buffer(&cso->aux_constbuf_current[shader_stage], cb); + } +} + +void +cso_set_constant_buffer_resource(struct cso_context *cso, + unsigned shader_stage, + unsigned index, + struct pipe_resource *buffer) +{ + if (buffer) { + struct pipe_constant_buffer cb; + cb.buffer = buffer; + cb.buffer_offset = 0; + cb.buffer_size = buffer->width0; + cb.user_buffer = NULL; + cso_set_constant_buffer(cso, shader_stage, index, &cb); + } else { + cso_set_constant_buffer(cso, shader_stage, index, NULL); + } +} + +void +cso_save_constant_buffer_slot0(struct cso_context *cso, + unsigned shader_stage) +{ + util_copy_constant_buffer(&cso->aux_constbuf_saved[shader_stage], + &cso->aux_constbuf_current[shader_stage]); +} + +void +cso_restore_constant_buffer_slot0(struct cso_context *cso, + unsigned shader_stage) +{ + cso_set_constant_buffer(cso, shader_stage, 0, + &cso->aux_constbuf_saved[shader_stage]); + pipe_resource_reference(&cso->aux_constbuf_saved[shader_stage].buffer, + NULL); +} + +/* drawing */ + +void +cso_set_index_buffer(struct cso_context *cso, + const struct pipe_index_buffer *ib) +{ + struct u_vbuf *vbuf = cso->vbuf; + + if (vbuf) { + u_vbuf_set_index_buffer(vbuf, ib); + } else { + struct pipe_context *pipe = cso->pipe; + pipe->set_index_buffer(pipe, ib); + } +} + +void +cso_draw_vbo(struct cso_context *cso, + const struct pipe_draw_info *info) +{ + struct u_vbuf *vbuf = cso->vbuf; + + if (vbuf) { + u_vbuf_draw_vbo(vbuf, info); + } else { + struct pipe_context *pipe = cso->pipe; + pipe->draw_vbo(pipe, info); + } +} + +void +cso_draw_arrays(struct cso_context *cso, uint mode, uint start, uint count) +{ + struct pipe_draw_info info; + + util_draw_init_info(&info); + + info.mode = mode; + info.start = start; + info.count = count; + info.min_index = start; + info.max_index = start + count - 1; + + cso_draw_vbo(cso, &info); +} diff --git a/drivers/video/Gallium/auxiliary/cso_cache/cso_context.h b/drivers/video/Gallium/auxiliary/cso_cache/cso_context.h new file mode 100644 index 0000000000..82c8e18def --- /dev/null +++ b/drivers/video/Gallium/auxiliary/cso_cache/cso_context.h @@ -0,0 +1,239 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CSO_CONTEXT_H +#define CSO_CONTEXT_H + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "pipe/p_defines.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +struct cso_context; +struct u_vbuf; + +struct cso_context *cso_create_context( struct pipe_context *pipe ); + +void cso_release_all( struct cso_context *ctx ); + +void cso_destroy_context( struct cso_context *cso ); + + + +enum pipe_error cso_set_blend( struct cso_context *cso, + const struct pipe_blend_state *blend ); +void cso_save_blend(struct cso_context *cso); +void cso_restore_blend(struct cso_context *cso); + + + +enum pipe_error cso_set_depth_stencil_alpha( struct cso_context *cso, + const struct pipe_depth_stencil_alpha_state *dsa ); +void cso_save_depth_stencil_alpha(struct cso_context *cso); +void cso_restore_depth_stencil_alpha(struct cso_context *cso); + + + +enum pipe_error cso_set_rasterizer( struct cso_context *cso, + const struct pipe_rasterizer_state *rasterizer ); +void cso_save_rasterizer(struct cso_context *cso); +void cso_restore_rasterizer(struct cso_context *cso); + + +enum pipe_error +cso_set_samplers(struct cso_context *cso, + unsigned shader_stage, + unsigned count, + const struct pipe_sampler_state **states); + +void +cso_save_samplers(struct cso_context *cso, unsigned shader_stage); + +void +cso_restore_samplers(struct cso_context *cso, unsigned shader_stage); + +/* Alternate interface to support state trackers that like to modify + * samplers one at a time: + */ +enum pipe_error +cso_single_sampler(struct cso_context *cso, + unsigned shader_stage, + unsigned count, + const struct pipe_sampler_state *states); + +void +cso_single_sampler_done(struct cso_context *cso, unsigned shader_stage); + + +enum pipe_error cso_set_vertex_elements(struct cso_context *ctx, + unsigned count, + const struct pipe_vertex_element *states); +void cso_save_vertex_elements(struct cso_context *ctx); +void cso_restore_vertex_elements(struct cso_context *ctx); + + +void cso_set_vertex_buffers(struct cso_context *ctx, + unsigned start_slot, unsigned count, + const struct pipe_vertex_buffer *buffers); + +/* One vertex buffer slot is provided with the save/restore functionality. + * cso_context chooses the slot, it can be non-zero. */ +void cso_save_aux_vertex_buffer_slot(struct cso_context *ctx); +void cso_restore_aux_vertex_buffer_slot(struct cso_context *ctx); +unsigned cso_get_aux_vertex_buffer_slot(struct cso_context *ctx); + + +void cso_set_stream_outputs(struct cso_context *ctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + unsigned append_bitmask); +void cso_save_stream_outputs(struct cso_context *ctx); +void cso_restore_stream_outputs(struct cso_context *ctx); + + +/* + * We don't provide shader caching in CSO. Most of the time the api provides + * object semantics for shaders anyway, and the cases where it doesn't + * (eg mesa's internally-generated texenv programs), it will be up to + * the state tracker to implement their own specialized caching. + */ + +void cso_set_fragment_shader_handle(struct cso_context *ctx, void *handle); +void cso_delete_fragment_shader(struct cso_context *ctx, void *handle ); +void cso_save_fragment_shader(struct cso_context *cso); +void cso_restore_fragment_shader(struct cso_context *cso); + + +void cso_set_vertex_shader_handle(struct cso_context *ctx, void *handle); +void cso_delete_vertex_shader(struct cso_context *ctx, void *handle ); +void cso_save_vertex_shader(struct cso_context *cso); +void cso_restore_vertex_shader(struct cso_context *cso); + + +void cso_set_geometry_shader_handle(struct cso_context *ctx, void *handle); +void cso_delete_geometry_shader(struct cso_context *ctx, void *handle); +void cso_save_geometry_shader(struct cso_context *cso); +void cso_restore_geometry_shader(struct cso_context *cso); + + +void cso_set_framebuffer(struct cso_context *cso, + const struct pipe_framebuffer_state *fb); +void cso_save_framebuffer(struct cso_context *cso); +void cso_restore_framebuffer(struct cso_context *cso); + + +void cso_set_viewport(struct cso_context *cso, + const struct pipe_viewport_state *vp); +void cso_save_viewport(struct cso_context *cso); +void cso_restore_viewport(struct cso_context *cso); + + +void cso_set_blend_color(struct cso_context *cso, + const struct pipe_blend_color *bc); + +void cso_set_sample_mask(struct cso_context *cso, unsigned stencil_mask); +void cso_save_sample_mask(struct cso_context *ctx); +void cso_restore_sample_mask(struct cso_context *ctx); + +void cso_set_stencil_ref(struct cso_context *cso, + const struct pipe_stencil_ref *sr); +void cso_save_stencil_ref(struct cso_context *cso); +void cso_restore_stencil_ref(struct cso_context *cso); + +void cso_set_render_condition(struct cso_context *cso, + struct pipe_query *query, + boolean condition, uint mode); +void cso_save_render_condition(struct cso_context *cso); +void cso_restore_render_condition(struct cso_context *cso); + + +/* clip state */ + +void +cso_set_clip(struct cso_context *cso, + const struct pipe_clip_state *clip); + +void +cso_save_clip(struct cso_context *cso); + +void +cso_restore_clip(struct cso_context *cso); + + +/* sampler view state */ + +void +cso_set_sampler_views(struct cso_context *cso, + unsigned shader_stage, + unsigned count, + struct pipe_sampler_view **views); + +void +cso_save_sampler_views(struct cso_context *cso, unsigned shader_stage); + +void +cso_restore_sampler_views(struct cso_context *cso, unsigned shader_stage); + + +/* constant buffers */ + +void cso_set_constant_buffer(struct cso_context *cso, unsigned shader_stage, + unsigned index, struct pipe_constant_buffer *cb); +void cso_set_constant_buffer_resource(struct cso_context *cso, + unsigned shader_stage, + unsigned index, + struct pipe_resource *buffer); +void cso_save_constant_buffer_slot0(struct cso_context *cso, + unsigned shader_stage); +void cso_restore_constant_buffer_slot0(struct cso_context *cso, + unsigned shader_stage); + + +/* drawing */ + +void +cso_set_index_buffer(struct cso_context *cso, + const struct pipe_index_buffer *ib); + +void +cso_draw_vbo(struct cso_context *cso, + const struct pipe_draw_info *info); + +/* helper drawing function */ +void +cso_draw_arrays(struct cso_context *cso, uint mode, uint start, uint count); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/drivers/video/Gallium/auxiliary/cso_cache/cso_hash.c b/drivers/video/Gallium/auxiliary/cso_cache/cso_hash.c new file mode 100644 index 0000000000..288cef7b6f --- /dev/null +++ b/drivers/video/Gallium/auxiliary/cso_cache/cso_hash.c @@ -0,0 +1,439 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin + */ + +#include "util/u_debug.h" +#include "util/u_memory.h" + +#include "cso_hash.h" + +#define MAX(a, b) ((a > b) ? (a) : (b)) + +static const int MinNumBits = 4; + +static const unsigned char prime_deltas[] = { + 0, 0, 1, 3, 1, 5, 3, 3, 1, 9, 7, 5, 3, 9, 25, 3, + 1, 21, 3, 21, 7, 15, 9, 5, 3, 29, 15, 0, 0, 0, 0, 0 +}; + +static int primeForNumBits(int numBits) +{ + return (1 << numBits) + prime_deltas[numBits]; +} + +/* + Returns the smallest integer n such that + primeForNumBits(n) >= hint. +*/ +static int countBits(int hint) +{ + int numBits = 0; + int bits = hint; + + while (bits > 1) { + bits >>= 1; + numBits++; + } + + if (numBits >= (int)sizeof(prime_deltas)) { + numBits = sizeof(prime_deltas) - 1; + } else if (primeForNumBits(numBits) < hint) { + ++numBits; + } + return numBits; +} + +struct cso_node { + struct cso_node *next; + unsigned key; + void *value; +}; + +struct cso_hash_data { + struct cso_node *fakeNext; + struct cso_node **buckets; + int size; + int nodeSize; + short userNumBits; + short numBits; + int numBuckets; +}; + +struct cso_hash { + union { + struct cso_hash_data *d; + struct cso_node *e; + } data; +}; + +static void *cso_data_allocate_node(struct cso_hash_data *hash) +{ + return MALLOC(hash->nodeSize); +} + +static void cso_free_node(struct cso_node *node) +{ + FREE(node); +} + +static struct cso_node * +cso_hash_create_node(struct cso_hash *hash, + unsigned akey, void *avalue, + struct cso_node **anextNode) +{ + struct cso_node *node = cso_data_allocate_node(hash->data.d); + + if (!node) + return NULL; + + node->key = akey; + node->value = avalue; + + node->next = (struct cso_node*)(*anextNode); + *anextNode = node; + ++hash->data.d->size; + return node; +} + +static void cso_data_rehash(struct cso_hash_data *hash, int hint) +{ + if (hint < 0) { + hint = countBits(-hint); + if (hint < MinNumBits) + hint = MinNumBits; + hash->userNumBits = (short)hint; + while (primeForNumBits(hint) < (hash->size >> 1)) + ++hint; + } else if (hint < MinNumBits) { + hint = MinNumBits; + } + + if (hash->numBits != hint) { + struct cso_node *e = (struct cso_node *)(hash); + struct cso_node **oldBuckets = hash->buckets; + int oldNumBuckets = hash->numBuckets; + int i = 0; + + hash->numBits = (short)hint; + hash->numBuckets = primeForNumBits(hint); + hash->buckets = MALLOC(sizeof(struct cso_node*) * hash->numBuckets); + for (i = 0; i < hash->numBuckets; ++i) + hash->buckets[i] = e; + + for (i = 0; i < oldNumBuckets; ++i) { + struct cso_node *firstNode = oldBuckets[i]; + while (firstNode != e) { + unsigned h = firstNode->key; + struct cso_node *lastNode = firstNode; + struct cso_node *afterLastNode; + struct cso_node **beforeFirstNode; + + while (lastNode->next != e && lastNode->next->key == h) + lastNode = lastNode->next; + + afterLastNode = lastNode->next; + beforeFirstNode = &hash->buckets[h % hash->numBuckets]; + while (*beforeFirstNode != e) + beforeFirstNode = &(*beforeFirstNode)->next; + lastNode->next = *beforeFirstNode; + *beforeFirstNode = firstNode; + firstNode = afterLastNode; + } + } + FREE(oldBuckets); + } +} + +static void cso_data_might_grow(struct cso_hash_data *hash) +{ + if (hash->size >= hash->numBuckets) + cso_data_rehash(hash, hash->numBits + 1); +} + +static void cso_data_has_shrunk(struct cso_hash_data *hash) +{ + if (hash->size <= (hash->numBuckets >> 3) && + hash->numBits > hash->userNumBits) { + int max = MAX(hash->numBits-2, hash->userNumBits); + cso_data_rehash(hash, max); + } +} + +static struct cso_node *cso_data_first_node(struct cso_hash_data *hash) +{ + struct cso_node *e = (struct cso_node *)(hash); + struct cso_node **bucket = hash->buckets; + int n = hash->numBuckets; + while (n--) { + if (*bucket != e) + return *bucket; + ++bucket; + } + return e; +} + +static struct cso_node **cso_hash_find_node(struct cso_hash *hash, unsigned akey) +{ + struct cso_node **node; + + if (hash->data.d->numBuckets) { + node = (struct cso_node **)(&hash->data.d->buckets[akey % hash->data.d->numBuckets]); + assert(*node == hash->data.e || (*node)->next); + while (*node != hash->data.e && (*node)->key != akey) + node = &(*node)->next; + } else { + node = (struct cso_node **)((const struct cso_node * const *)(&hash->data.e)); + } + return node; +} + +struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, + unsigned key, void *data) +{ + cso_data_might_grow(hash->data.d); + + { + struct cso_node **nextNode = cso_hash_find_node(hash, key); + struct cso_node *node = cso_hash_create_node(hash, key, data, nextNode); + if (!node) { + struct cso_hash_iter null_iter = {hash, 0}; + return null_iter; + } + + { + struct cso_hash_iter iter = {hash, node}; + return iter; + } + } +} + +struct cso_hash * cso_hash_create(void) +{ + struct cso_hash *hash = MALLOC_STRUCT(cso_hash); + if (!hash) + return NULL; + + hash->data.d = MALLOC_STRUCT(cso_hash_data); + if (!hash->data.d) { + FREE(hash); + return NULL; + } + + hash->data.d->fakeNext = 0; + hash->data.d->buckets = 0; + hash->data.d->size = 0; + hash->data.d->nodeSize = sizeof(struct cso_node); + hash->data.d->userNumBits = (short)MinNumBits; + hash->data.d->numBits = 0; + hash->data.d->numBuckets = 0; + + return hash; +} + +void cso_hash_delete(struct cso_hash *hash) +{ + struct cso_node *e_for_x = (struct cso_node *)(hash->data.d); + struct cso_node **bucket = (struct cso_node **)(hash->data.d->buckets); + int n = hash->data.d->numBuckets; + while (n--) { + struct cso_node *cur = *bucket++; + while (cur != e_for_x) { + struct cso_node *next = cur->next; + cso_free_node(cur); + cur = next; + } + } + FREE(hash->data.d->buckets); + FREE(hash->data.d); + FREE(hash); +} + +struct cso_hash_iter cso_hash_find(struct cso_hash *hash, + unsigned key) +{ + struct cso_node **nextNode = cso_hash_find_node(hash, key); + struct cso_hash_iter iter = {hash, *nextNode}; + return iter; +} + +unsigned cso_hash_iter_key(struct cso_hash_iter iter) +{ + if (!iter.node || iter.hash->data.e == iter.node) + return 0; + return iter.node->key; +} + +void * cso_hash_iter_data(struct cso_hash_iter iter) +{ + if (!iter.node || iter.hash->data.e == iter.node) + return 0; + return iter.node->value; +} + +static struct cso_node *cso_hash_data_next(struct cso_node *node) +{ + union { + struct cso_node *next; + struct cso_node *e; + struct cso_hash_data *d; + } a; + int start; + struct cso_node **bucket; + int n; + + a.next = node->next; + if (!a.next) { + debug_printf("iterating beyond the last element\n"); + return 0; + } + if (a.next->next) + return a.next; + + start = (node->key % a.d->numBuckets) + 1; + bucket = a.d->buckets + start; + n = a.d->numBuckets - start; + while (n--) { + if (*bucket != a.e) + return *bucket; + ++bucket; + } + return a.e; +} + + +static struct cso_node *cso_hash_data_prev(struct cso_node *node) +{ + union { + struct cso_node *e; + struct cso_hash_data *d; + } a; + int start; + struct cso_node *sentinel; + struct cso_node **bucket; + + a.e = node; + while (a.e->next) + a.e = a.e->next; + + if (node == a.e) + start = a.d->numBuckets - 1; + else + start = node->key % a.d->numBuckets; + + sentinel = node; + bucket = a.d->buckets + start; + while (start >= 0) { + if (*bucket != sentinel) { + struct cso_node *prev = *bucket; + while (prev->next != sentinel) + prev = prev->next; + return prev; + } + + sentinel = a.e; + --bucket; + --start; + } + debug_printf("iterating backward beyond first element\n"); + return a.e; +} + +struct cso_hash_iter cso_hash_iter_next(struct cso_hash_iter iter) +{ + struct cso_hash_iter next = {iter.hash, cso_hash_data_next(iter.node)}; + return next; +} + +int cso_hash_iter_is_null(struct cso_hash_iter iter) +{ + if (!iter.node || iter.node == iter.hash->data.e) + return 1; + return 0; +} + +void * cso_hash_take(struct cso_hash *hash, + unsigned akey) +{ + struct cso_node **node = cso_hash_find_node(hash, akey); + if (*node != hash->data.e) { + void *t = (*node)->value; + struct cso_node *next = (*node)->next; + cso_free_node(*node); + *node = next; + --hash->data.d->size; + cso_data_has_shrunk(hash->data.d); + return t; + } + return 0; +} + +struct cso_hash_iter cso_hash_iter_prev(struct cso_hash_iter iter) +{ + struct cso_hash_iter prev = {iter.hash, + cso_hash_data_prev(iter.node)}; + return prev; +} + +struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash) +{ + struct cso_hash_iter iter = {hash, cso_data_first_node(hash->data.d)}; + return iter; +} + +int cso_hash_size(struct cso_hash *hash) +{ + return hash->data.d->size; +} + +struct cso_hash_iter cso_hash_erase(struct cso_hash *hash, struct cso_hash_iter iter) +{ + struct cso_hash_iter ret = iter; + struct cso_node *node = iter.node; + struct cso_node **node_ptr; + + if (node == hash->data.e) + return iter; + + ret = cso_hash_iter_next(ret); + node_ptr = (struct cso_node**)(&hash->data.d->buckets[node->key % hash->data.d->numBuckets]); + while (*node_ptr != node) + node_ptr = &(*node_ptr)->next; + *node_ptr = node->next; + cso_free_node(node); + --hash->data.d->size; + return ret; +} + +boolean cso_hash_contains(struct cso_hash *hash, unsigned key) +{ + struct cso_node **node = cso_hash_find_node(hash, key); + return (*node != hash->data.e); +} diff --git a/drivers/video/Gallium/auxiliary/cso_cache/cso_hash.h b/drivers/video/Gallium/auxiliary/cso_cache/cso_hash.h new file mode 100644 index 0000000000..5891c325fa --- /dev/null +++ b/drivers/video/Gallium/auxiliary/cso_cache/cso_hash.h @@ -0,0 +1,129 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Hash table implementation. + * + * This file provides a hash implementation that is capable of dealing + * with collisions. It stores colliding entries in linked list. All + * functions operating on the hash return an iterator. The iterator + * itself points to the collision list. If there wasn't any collision + * the list will have just one entry, otherwise client code should + * iterate over the entries to find the exact entry among ones that + * had the same key (e.g. memcmp could be used on the data to check + * that) + * + * @author Zack Rusin + */ + +#ifndef CSO_HASH_H +#define CSO_HASH_H + +#include "pipe/p_compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +struct cso_hash; +struct cso_node; + + +struct cso_hash_iter { + struct cso_hash *hash; + struct cso_node *node; +}; + + +struct cso_hash *cso_hash_create(void); +void cso_hash_delete(struct cso_hash *hash); + + +int cso_hash_size(struct cso_hash *hash); + + +/** + * Adds a data with the given key to the hash. If entry with the given + * key is already in the hash, this current entry is instered before it + * in the collision list. + * Function returns iterator pointing to the inserted item in the hash. + */ +struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, unsigned key, + void *data); +/** + * Removes the item pointed to by the current iterator from the hash. + * Note that the data itself is not erased and if it was a malloc'ed pointer + * it will have to be freed after calling this function by the callee. + * Function returns iterator pointing to the item after the removed one in + * the hash. + */ +struct cso_hash_iter cso_hash_erase(struct cso_hash *hash, struct cso_hash_iter iter); + +void *cso_hash_take(struct cso_hash *hash, unsigned key); + + + +struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash); + +/** + * Return an iterator pointing to the first entry in the collision list. + */ +struct cso_hash_iter cso_hash_find(struct cso_hash *hash, unsigned key); + +/** + * Returns true if a value with the given key exists in the hash + */ +boolean cso_hash_contains(struct cso_hash *hash, unsigned key); + + +int cso_hash_iter_is_null(struct cso_hash_iter iter); +unsigned cso_hash_iter_key(struct cso_hash_iter iter); +void *cso_hash_iter_data(struct cso_hash_iter iter); + + +struct cso_hash_iter cso_hash_iter_next(struct cso_hash_iter iter); +struct cso_hash_iter cso_hash_iter_prev(struct cso_hash_iter iter); + + +/** + * Convenience routine to iterate over the collision list while doing a memory + * comparison to see which entry in the list is a direct copy of our template + * and returns that entry. + */ +void *cso_hash_find_data_from_template( struct cso_hash *hash, + unsigned hash_key, + void *templ, + int size ); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/drivers/video/Gallium/auxiliary/os/os_time.c b/drivers/video/Gallium/auxiliary/os/os_time.c index f7e4ca49c7..abcba63fe1 100644 --- a/drivers/video/Gallium/auxiliary/os/os_time.c +++ b/drivers/video/Gallium/auxiliary/os/os_time.c @@ -28,21 +28,15 @@ /** * @file * OS independent time-manipulation functions. - * + * * @author Jose Fonseca */ #include "pipe/p_config.h" -#if defined(PIPE_OS_UNIX) # include /* timeval */ # include /* timeval */ -#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) -# include -#else -# error Unsupported OS -#endif #include "os_time.h" @@ -50,32 +44,8 @@ int64_t os_time_get_nano(void) { -#if defined(PIPE_OS_LINUX) - - struct timespec tv; - clock_gettime(CLOCK_MONOTONIC, &tv); - return tv.tv_nsec + tv.tv_sec*INT64_C(1000000000); - -#elif defined(PIPE_OS_UNIX) - struct timeval tv; gettimeofday(&tv, NULL); - return tv.tv_usec*INT64_C(1000) + tv.tv_sec*INT64_C(1000000000); - -#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) - - static LARGE_INTEGER frequency; - LARGE_INTEGER counter; - if(!frequency.QuadPart) - QueryPerformanceFrequency(&frequency); - QueryPerformanceCounter(&counter); - return counter.QuadPart*INT64_C(1000000000)/frequency.QuadPart; - -#else - -#error Unsupported OS - -#endif } diff --git a/drivers/video/Gallium/auxiliary/pipebuffer/pb_buffer.h b/drivers/video/Gallium/auxiliary/pipebuffer/pb_buffer.h new file mode 100644 index 0000000000..a5ec93c255 --- /dev/null +++ b/drivers/video/Gallium/auxiliary/pipebuffer/pb_buffer.h @@ -0,0 +1,288 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Generic code for buffers. + * + * Behind a pipe buffle handle there can be DMA buffers, client (or user) + * buffers, regular malloced buffers, etc. This file provides an abstract base + * buffer handle that allows the driver to cope with all those kinds of buffers + * in a more flexible way. + * + * There is no obligation of a winsys driver to use this library. And a pipe + * driver should be completly agnostic about it. + * + * \author Jose Fonseca + */ + +#ifndef PB_BUFFER_H_ +#define PB_BUFFER_H_ + + +#include "pipe/p_compiler.h" +#include "util/u_debug.h" +#include "util/u_inlines.h" +#include "pipe/p_defines.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +struct pb_vtbl; +struct pb_validate; +struct pipe_fence_handle; + + +#define PB_USAGE_CPU_READ (1 << 0) +#define PB_USAGE_CPU_WRITE (1 << 1) +#define PB_USAGE_GPU_READ (1 << 2) +#define PB_USAGE_GPU_WRITE (1 << 3) +#define PB_USAGE_UNSYNCHRONIZED (1 << 10) +#define PB_USAGE_DONTBLOCK (1 << 9) + +#define PB_USAGE_CPU_READ_WRITE \ + ( PB_USAGE_CPU_READ | PB_USAGE_CPU_WRITE ) +#define PB_USAGE_GPU_READ_WRITE \ + ( PB_USAGE_GPU_READ | PB_USAGE_GPU_WRITE ) +#define PB_USAGE_WRITE \ + ( PB_USAGE_CPU_WRITE | PB_USAGE_GPU_WRITE ) + +/** + * Buffer description. + * + * Used when allocating the buffer. + */ +struct pb_desc +{ + unsigned alignment; + unsigned usage; +}; + + +/** + * Size. Regular (32bit) unsigned for now. + */ +typedef unsigned pb_size; + + +/** + * Base class for all pb_* buffers. + */ +struct pb_buffer +{ + struct pipe_reference reference; + unsigned size; + unsigned alignment; + unsigned usage; + + /** + * Pointer to the virtual function table. + * + * Avoid accessing this table directly. Use the inline functions below + * instead to avoid mistakes. + */ + const struct pb_vtbl *vtbl; +}; + + +/** + * Virtual function table for the buffer storage operations. + * + * Note that creation is not done through this table. + */ +struct pb_vtbl +{ + void (*destroy)( struct pb_buffer *buf ); + + /** + * Map the entire data store of a buffer object into the client's address. + * flags is bitmask of PB_USAGE_CPU_READ/WRITE. + */ + void *(*map)( struct pb_buffer *buf, + unsigned flags, void *flush_ctx ); + + void (*unmap)( struct pb_buffer *buf ); + + enum pipe_error (*validate)( struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags ); + + void (*fence)( struct pb_buffer *buf, + struct pipe_fence_handle *fence ); + + /** + * Get the base buffer and the offset. + * + * A buffer can be subdivided in smaller buffers. This method should return + * the underlaying buffer, and the relative offset. + * + * Buffers without an underlaying base buffer should return themselves, with + * a zero offset. + * + * Note that this will increase the reference count of the base buffer. + */ + void (*get_base_buffer)( struct pb_buffer *buf, + struct pb_buffer **base_buf, + pb_size *offset ); + +}; + + + +/* Accessor functions for pb->vtbl: + */ +static INLINE void * +pb_map(struct pb_buffer *buf, + unsigned flags, void *flush_ctx) +{ + assert(buf); + if(!buf) + return NULL; + assert(pipe_is_referenced(&buf->reference)); + return buf->vtbl->map(buf, flags, flush_ctx); +} + + +static INLINE void +pb_unmap(struct pb_buffer *buf) +{ + assert(buf); + if(!buf) + return; + assert(pipe_is_referenced(&buf->reference)); + buf->vtbl->unmap(buf); +} + + +static INLINE void +pb_get_base_buffer( struct pb_buffer *buf, + struct pb_buffer **base_buf, + pb_size *offset ) +{ + assert(buf); + if(!buf) { + base_buf = NULL; + offset = 0; + return; + } + assert(pipe_is_referenced(&buf->reference)); + assert(buf->vtbl->get_base_buffer); + buf->vtbl->get_base_buffer(buf, base_buf, offset); + assert(*base_buf); + assert(*offset < (*base_buf)->size); +} + + +static INLINE enum pipe_error +pb_validate(struct pb_buffer *buf, struct pb_validate *vl, unsigned flags) +{ + assert(buf); + if(!buf) + return PIPE_ERROR; + assert(buf->vtbl->validate); + return buf->vtbl->validate(buf, vl, flags); +} + + +static INLINE void +pb_fence(struct pb_buffer *buf, struct pipe_fence_handle *fence) +{ + assert(buf); + if(!buf) + return; + assert(buf->vtbl->fence); + buf->vtbl->fence(buf, fence); +} + + +static INLINE void +pb_destroy(struct pb_buffer *buf) +{ + assert(buf); + if(!buf) + return; + assert(!pipe_is_referenced(&buf->reference)); + buf->vtbl->destroy(buf); +} + +static INLINE void +pb_reference(struct pb_buffer **dst, + struct pb_buffer *src) +{ + struct pb_buffer *old = *dst; + + if (pipe_reference(&(*dst)->reference, &src->reference)) + pb_destroy( old ); + *dst = src; +} + + +/** + * Utility function to check whether the provided alignment is consistent with + * the requested or not. + */ +static INLINE boolean +pb_check_alignment(pb_size requested, pb_size provided) +{ + if(!requested) + return TRUE; + if(requested > provided) + return FALSE; + if(provided % requested != 0) + return FALSE; + return TRUE; +} + + +/** + * Utility function to check whether the provided alignment is consistent with + * the requested or not. + */ +static INLINE boolean +pb_check_usage(unsigned requested, unsigned provided) +{ + return (requested & provided) == requested ? TRUE : FALSE; +} + + +/** + * Malloc-based buffer to store data that can't be used by the graphics + * hardware. + */ +struct pb_buffer * +pb_malloc_buffer_create(pb_size size, + const struct pb_desc *desc); + + +#ifdef __cplusplus +} +#endif + +#endif /*PB_BUFFER_H_*/ diff --git a/drivers/video/Gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/drivers/video/Gallium/auxiliary/pipebuffer/pb_buffer_fenced.c new file mode 100644 index 0000000000..9e0cacecac --- /dev/null +++ b/drivers/video/Gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -0,0 +1,1069 @@ +/************************************************************************** + * + * Copyright 2007-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Implementation of fenced buffers. + * + * \author Jose Fonseca + * \author Thomas Hellström + */ + + +#include "pipe/p_config.h" + +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) +#include +#include +#endif + +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" +#include "util/u_debug.h" +#include "os/os_thread.h" +#include "util/u_memory.h" +#include "util/u_double_list.h" + +#include "pb_buffer.h" +#include "pb_buffer_fenced.h" +#include "pb_bufmgr.h" + + + +/** + * Convenience macro (type safe). + */ +#define SUPER(__derived) (&(__derived)->base) + + +struct fenced_manager +{ + struct pb_manager base; + struct pb_manager *provider; + struct pb_fence_ops *ops; + + /** + * Maximum buffer size that can be safely allocated. + */ + pb_size max_buffer_size; + + /** + * Maximum cpu memory we can allocate before we start waiting for the + * GPU to idle. + */ + pb_size max_cpu_total_size; + + /** + * Following members are mutable and protected by this mutex. + */ + pipe_mutex mutex; + + /** + * Fenced buffer list. + * + * All fenced buffers are placed in this listed, ordered from the oldest + * fence to the newest fence. + */ + struct list_head fenced; + pb_size num_fenced; + + struct list_head unfenced; + pb_size num_unfenced; + + /** + * How much temporary CPU memory is being used to hold unvalidated buffers. + */ + pb_size cpu_total_size; +}; + + +/** + * Fenced buffer. + * + * Wrapper around a pipe buffer which adds fencing and reference counting. + */ +struct fenced_buffer +{ + /* + * Immutable members. + */ + + struct pb_buffer base; + struct fenced_manager *mgr; + + /* + * Following members are mutable and protected by fenced_manager::mutex. + */ + + struct list_head head; + + /** + * Buffer with storage. + */ + struct pb_buffer *buffer; + pb_size size; + struct pb_desc desc; + + /** + * Temporary CPU storage data. Used when there isn't enough GPU memory to + * store the buffer. + */ + void *data; + + /** + * A bitmask of PB_USAGE_CPU/GPU_READ/WRITE describing the current + * buffer usage. + */ + unsigned flags; + + unsigned mapcount; + + struct pb_validate *vl; + unsigned validation_flags; + + struct pipe_fence_handle *fence; +}; + + +static INLINE struct fenced_manager * +fenced_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct fenced_manager *)mgr; +} + + +static INLINE struct fenced_buffer * +fenced_buffer(struct pb_buffer *buf) +{ + assert(buf); + return (struct fenced_buffer *)buf; +} + + +static void +fenced_buffer_destroy_cpu_storage_locked(struct fenced_buffer *fenced_buf); + +static enum pipe_error +fenced_buffer_create_cpu_storage_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf); + +static void +fenced_buffer_destroy_gpu_storage_locked(struct fenced_buffer *fenced_buf); + +static enum pipe_error +fenced_buffer_create_gpu_storage_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf, + boolean wait); + +static enum pipe_error +fenced_buffer_copy_storage_to_gpu_locked(struct fenced_buffer *fenced_buf); + +static enum pipe_error +fenced_buffer_copy_storage_to_cpu_locked(struct fenced_buffer *fenced_buf); + + +/** + * Dump the fenced buffer list. + * + * Useful to understand failures to allocate buffers. + */ +static void +fenced_manager_dump_locked(struct fenced_manager *fenced_mgr) +{ +#ifdef DEBUG + struct pb_fence_ops *ops = fenced_mgr->ops; + struct list_head *curr, *next; + struct fenced_buffer *fenced_buf; + + debug_printf("%10s %7s %8s %7s %10s %s\n", + "buffer", "size", "refcount", "storage", "fence", "signalled"); + + curr = fenced_mgr->unfenced.next; + next = curr->next; + while(curr != &fenced_mgr->unfenced) { + fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + assert(!fenced_buf->fence); + debug_printf("%10p %7u %8u %7s\n", + (void *) fenced_buf, + fenced_buf->base.size, + p_atomic_read(&fenced_buf->base.reference.count), + fenced_buf->buffer ? "gpu" : (fenced_buf->data ? "cpu" : "none")); + curr = next; + next = curr->next; + } + + curr = fenced_mgr->fenced.next; + next = curr->next; + while(curr != &fenced_mgr->fenced) { + int signaled; + fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + assert(fenced_buf->buffer); + signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); + debug_printf("%10p %7u %8u %7s %10p %s\n", + (void *) fenced_buf, + fenced_buf->base.size, + p_atomic_read(&fenced_buf->base.reference.count), + "gpu", + (void *) fenced_buf->fence, + signaled == 0 ? "y" : "n"); + curr = next; + next = curr->next; + } +#else + (void)fenced_mgr; +#endif +} + + +static INLINE void +fenced_buffer_destroy_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) +{ + assert(!pipe_is_referenced(&fenced_buf->base.reference)); + + assert(!fenced_buf->fence); + assert(fenced_buf->head.prev); + assert(fenced_buf->head.next); + LIST_DEL(&fenced_buf->head); + assert(fenced_mgr->num_unfenced); + --fenced_mgr->num_unfenced; + + fenced_buffer_destroy_gpu_storage_locked(fenced_buf); + fenced_buffer_destroy_cpu_storage_locked(fenced_buf); + + FREE(fenced_buf); +} + + +/** + * Add the buffer to the fenced list. + * + * Reference count should be incremented before calling this function. + */ +static INLINE void +fenced_buffer_add_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) +{ + assert(pipe_is_referenced(&fenced_buf->base.reference)); + assert(fenced_buf->flags & PB_USAGE_GPU_READ_WRITE); + assert(fenced_buf->fence); + + p_atomic_inc(&fenced_buf->base.reference.count); + + LIST_DEL(&fenced_buf->head); + assert(fenced_mgr->num_unfenced); + --fenced_mgr->num_unfenced; + LIST_ADDTAIL(&fenced_buf->head, &fenced_mgr->fenced); + ++fenced_mgr->num_fenced; +} + + +/** + * Remove the buffer from the fenced list, and potentially destroy the buffer + * if the reference count reaches zero. + * + * Returns TRUE if the buffer was detroyed. + */ +static INLINE boolean +fenced_buffer_remove_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) +{ + struct pb_fence_ops *ops = fenced_mgr->ops; + + assert(fenced_buf->fence); + assert(fenced_buf->mgr == fenced_mgr); + + ops->fence_reference(ops, &fenced_buf->fence, NULL); + fenced_buf->flags &= ~PB_USAGE_GPU_READ_WRITE; + + assert(fenced_buf->head.prev); + assert(fenced_buf->head.next); + + LIST_DEL(&fenced_buf->head); + assert(fenced_mgr->num_fenced); + --fenced_mgr->num_fenced; + + LIST_ADDTAIL(&fenced_buf->head, &fenced_mgr->unfenced); + ++fenced_mgr->num_unfenced; + + if (p_atomic_dec_zero(&fenced_buf->base.reference.count)) { + fenced_buffer_destroy_locked(fenced_mgr, fenced_buf); + return TRUE; + } + + return FALSE; +} + + +/** + * Wait for the fence to expire, and remove it from the fenced list. + * + * This function will release and re-aquire the mutex, so any copy of mutable + * state must be discarded after calling it. + */ +static INLINE enum pipe_error +fenced_buffer_finish_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) +{ + struct pb_fence_ops *ops = fenced_mgr->ops; + enum pipe_error ret = PIPE_ERROR; + +#if 0 + debug_warning("waiting for GPU"); +#endif + + assert(pipe_is_referenced(&fenced_buf->base.reference)); + assert(fenced_buf->fence); + + if(fenced_buf->fence) { + struct pipe_fence_handle *fence = NULL; + int finished; + boolean proceed; + + ops->fence_reference(ops, &fence, fenced_buf->fence); + + pipe_mutex_unlock(fenced_mgr->mutex); + + finished = ops->fence_finish(ops, fenced_buf->fence, 0); + + pipe_mutex_lock(fenced_mgr->mutex); + + assert(pipe_is_referenced(&fenced_buf->base.reference)); + + /* + * Only proceed if the fence object didn't change in the meanwhile. + * Otherwise assume the work has been already carried out by another + * thread that re-aquired the lock before us. + */ + proceed = fence == fenced_buf->fence ? TRUE : FALSE; + + ops->fence_reference(ops, &fence, NULL); + + if(proceed && finished == 0) { + /* + * Remove from the fenced list + */ + + boolean destroyed; + + destroyed = fenced_buffer_remove_locked(fenced_mgr, fenced_buf); + + /* TODO: remove consequents buffers with the same fence? */ + + assert(!destroyed); + + fenced_buf->flags &= ~PB_USAGE_GPU_READ_WRITE; + + ret = PIPE_OK; + } + } + + return ret; +} + + +/** + * Remove as many fenced buffers from the fenced list as possible. + * + * Returns TRUE if at least one buffer was removed. + */ +static boolean +fenced_manager_check_signalled_locked(struct fenced_manager *fenced_mgr, + boolean wait) +{ + struct pb_fence_ops *ops = fenced_mgr->ops; + struct list_head *curr, *next; + struct fenced_buffer *fenced_buf; + struct pipe_fence_handle *prev_fence = NULL; + boolean ret = FALSE; + + curr = fenced_mgr->fenced.next; + next = curr->next; + while(curr != &fenced_mgr->fenced) { + fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + + if(fenced_buf->fence != prev_fence) { + int signaled; + + if (wait) { + signaled = ops->fence_finish(ops, fenced_buf->fence, 0); + + /* + * Don't return just now. Instead preemptively check if the + * following buffers' fences already expired, without further waits. + */ + wait = FALSE; + } + else { + signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); + } + + if (signaled != 0) { + return ret; + } + + prev_fence = fenced_buf->fence; + } + else { + /* This buffer's fence object is identical to the previous buffer's + * fence object, so no need to check the fence again. + */ + assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0); + } + + fenced_buffer_remove_locked(fenced_mgr, fenced_buf); + + ret = TRUE; + + curr = next; + next = curr->next; + } + + return ret; +} + + +/** + * Try to free some GPU memory by backing it up into CPU memory. + * + * Returns TRUE if at least one buffer was freed. + */ +static boolean +fenced_manager_free_gpu_storage_locked(struct fenced_manager *fenced_mgr) +{ + struct list_head *curr, *next; + struct fenced_buffer *fenced_buf; + + curr = fenced_mgr->unfenced.next; + next = curr->next; + while(curr != &fenced_mgr->unfenced) { + fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + + /* + * We can only move storage if the buffer is not mapped and not + * validated. + */ + if(fenced_buf->buffer && + !fenced_buf->mapcount && + !fenced_buf->vl) { + enum pipe_error ret; + + ret = fenced_buffer_create_cpu_storage_locked(fenced_mgr, fenced_buf); + if(ret == PIPE_OK) { + ret = fenced_buffer_copy_storage_to_cpu_locked(fenced_buf); + if(ret == PIPE_OK) { + fenced_buffer_destroy_gpu_storage_locked(fenced_buf); + return TRUE; + } + fenced_buffer_destroy_cpu_storage_locked(fenced_buf); + } + } + + curr = next; + next = curr->next; + } + + return FALSE; +} + + +/** + * Destroy CPU storage for this buffer. + */ +static void +fenced_buffer_destroy_cpu_storage_locked(struct fenced_buffer *fenced_buf) +{ + if(fenced_buf->data) { + align_free(fenced_buf->data); + fenced_buf->data = NULL; + assert(fenced_buf->mgr->cpu_total_size >= fenced_buf->size); + fenced_buf->mgr->cpu_total_size -= fenced_buf->size; + } +} + + +/** + * Create CPU storage for this buffer. + */ +static enum pipe_error +fenced_buffer_create_cpu_storage_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) +{ + assert(!fenced_buf->data); + if(fenced_buf->data) + return PIPE_OK; + + if (fenced_mgr->cpu_total_size + fenced_buf->size > fenced_mgr->max_cpu_total_size) + return PIPE_ERROR_OUT_OF_MEMORY; + + fenced_buf->data = align_malloc(fenced_buf->size, fenced_buf->desc.alignment); + if(!fenced_buf->data) + return PIPE_ERROR_OUT_OF_MEMORY; + + fenced_mgr->cpu_total_size += fenced_buf->size; + + return PIPE_OK; +} + + +/** + * Destroy the GPU storage. + */ +static void +fenced_buffer_destroy_gpu_storage_locked(struct fenced_buffer *fenced_buf) +{ + if(fenced_buf->buffer) { + pb_reference(&fenced_buf->buffer, NULL); + } +} + + +/** + * Try to create GPU storage for this buffer. + * + * This function is a shorthand around pb_manager::create_buffer for + * fenced_buffer_create_gpu_storage_locked()'s benefit. + */ +static INLINE boolean +fenced_buffer_try_create_gpu_storage_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) +{ + struct pb_manager *provider = fenced_mgr->provider; + + assert(!fenced_buf->buffer); + + fenced_buf->buffer = provider->create_buffer(fenced_mgr->provider, + fenced_buf->size, + &fenced_buf->desc); + return fenced_buf->buffer ? TRUE : FALSE; +} + + +/** + * Create GPU storage for this buffer. + */ +static enum pipe_error +fenced_buffer_create_gpu_storage_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf, + boolean wait) +{ + assert(!fenced_buf->buffer); + + /* + * Check for signaled buffers before trying to allocate. + */ + fenced_manager_check_signalled_locked(fenced_mgr, FALSE); + + fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf); + + /* + * Keep trying while there is some sort of progress: + * - fences are expiring, + * - or buffers are being being swapped out from GPU memory into CPU memory. + */ + while(!fenced_buf->buffer && + (fenced_manager_check_signalled_locked(fenced_mgr, FALSE) || + fenced_manager_free_gpu_storage_locked(fenced_mgr))) { + fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf); + } + + if(!fenced_buf->buffer && wait) { + /* + * Same as before, but this time around, wait to free buffers if + * necessary. + */ + while(!fenced_buf->buffer && + (fenced_manager_check_signalled_locked(fenced_mgr, TRUE) || + fenced_manager_free_gpu_storage_locked(fenced_mgr))) { + fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf); + } + } + + if(!fenced_buf->buffer) { + if(0) + fenced_manager_dump_locked(fenced_mgr); + + /* give up */ + return PIPE_ERROR_OUT_OF_MEMORY; + } + + return PIPE_OK; +} + + +static enum pipe_error +fenced_buffer_copy_storage_to_gpu_locked(struct fenced_buffer *fenced_buf) +{ + uint8_t *map; + + assert(fenced_buf->data); + assert(fenced_buf->buffer); + + map = pb_map(fenced_buf->buffer, PB_USAGE_CPU_WRITE, NULL); + if(!map) + return PIPE_ERROR; + + memcpy(map, fenced_buf->data, fenced_buf->size); + + pb_unmap(fenced_buf->buffer); + + return PIPE_OK; +} + + +static enum pipe_error +fenced_buffer_copy_storage_to_cpu_locked(struct fenced_buffer *fenced_buf) +{ + const uint8_t *map; + + assert(fenced_buf->data); + assert(fenced_buf->buffer); + + map = pb_map(fenced_buf->buffer, PB_USAGE_CPU_READ, NULL); + if(!map) + return PIPE_ERROR; + + memcpy(fenced_buf->data, map, fenced_buf->size); + + pb_unmap(fenced_buf->buffer); + + return PIPE_OK; +} + + +static void +fenced_buffer_destroy(struct pb_buffer *buf) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + + assert(!pipe_is_referenced(&fenced_buf->base.reference)); + + pipe_mutex_lock(fenced_mgr->mutex); + + fenced_buffer_destroy_locked(fenced_mgr, fenced_buf); + + pipe_mutex_unlock(fenced_mgr->mutex); +} + + +static void * +fenced_buffer_map(struct pb_buffer *buf, + unsigned flags, void *flush_ctx) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + struct pb_fence_ops *ops = fenced_mgr->ops; + void *map = NULL; + + pipe_mutex_lock(fenced_mgr->mutex); + + assert(!(flags & PB_USAGE_GPU_READ_WRITE)); + + /* + * Serialize writes. + */ + while((fenced_buf->flags & PB_USAGE_GPU_WRITE) || + ((fenced_buf->flags & PB_USAGE_GPU_READ) && + (flags & PB_USAGE_CPU_WRITE))) { + + /* + * Don't wait for the GPU to finish accessing it, if blocking is forbidden. + */ + if((flags & PB_USAGE_DONTBLOCK) && + ops->fence_signalled(ops, fenced_buf->fence, 0) != 0) { + goto done; + } + + if (flags & PB_USAGE_UNSYNCHRONIZED) { + break; + } + + /* + * Wait for the GPU to finish accessing. This will release and re-acquire + * the mutex, so all copies of mutable state must be discarded. + */ + fenced_buffer_finish_locked(fenced_mgr, fenced_buf); + } + + if(fenced_buf->buffer) { + map = pb_map(fenced_buf->buffer, flags, flush_ctx); + } + else { + assert(fenced_buf->data); + map = fenced_buf->data; + } + + if(map) { + ++fenced_buf->mapcount; + fenced_buf->flags |= flags & PB_USAGE_CPU_READ_WRITE; + } + +done: + pipe_mutex_unlock(fenced_mgr->mutex); + + return map; +} + + +static void +fenced_buffer_unmap(struct pb_buffer *buf) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + + pipe_mutex_lock(fenced_mgr->mutex); + + assert(fenced_buf->mapcount); + if(fenced_buf->mapcount) { + if (fenced_buf->buffer) + pb_unmap(fenced_buf->buffer); + --fenced_buf->mapcount; + if(!fenced_buf->mapcount) + fenced_buf->flags &= ~PB_USAGE_CPU_READ_WRITE; + } + + pipe_mutex_unlock(fenced_mgr->mutex); +} + + +static enum pipe_error +fenced_buffer_validate(struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + enum pipe_error ret; + + pipe_mutex_lock(fenced_mgr->mutex); + + if(!vl) { + /* invalidate */ + fenced_buf->vl = NULL; + fenced_buf->validation_flags = 0; + ret = PIPE_OK; + goto done; + } + + assert(flags & PB_USAGE_GPU_READ_WRITE); + assert(!(flags & ~PB_USAGE_GPU_READ_WRITE)); + flags &= PB_USAGE_GPU_READ_WRITE; + + /* Buffer cannot be validated in two different lists */ + if(fenced_buf->vl && fenced_buf->vl != vl) { + ret = PIPE_ERROR_RETRY; + goto done; + } + + if(fenced_buf->vl == vl && + (fenced_buf->validation_flags & flags) == flags) { + /* Nothing to do -- buffer already validated */ + ret = PIPE_OK; + goto done; + } + + /* + * Create and update GPU storage. + */ + if(!fenced_buf->buffer) { + assert(!fenced_buf->mapcount); + + ret = fenced_buffer_create_gpu_storage_locked(fenced_mgr, fenced_buf, TRUE); + if(ret != PIPE_OK) { + goto done; + } + + ret = fenced_buffer_copy_storage_to_gpu_locked(fenced_buf); + if(ret != PIPE_OK) { + fenced_buffer_destroy_gpu_storage_locked(fenced_buf); + goto done; + } + + if(fenced_buf->mapcount) { + debug_printf("warning: validating a buffer while it is still mapped\n"); + } + else { + fenced_buffer_destroy_cpu_storage_locked(fenced_buf); + } + } + + ret = pb_validate(fenced_buf->buffer, vl, flags); + if (ret != PIPE_OK) + goto done; + + fenced_buf->vl = vl; + fenced_buf->validation_flags |= flags; + +done: + pipe_mutex_unlock(fenced_mgr->mutex); + + return ret; +} + + +static void +fenced_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + struct pb_fence_ops *ops = fenced_mgr->ops; + + pipe_mutex_lock(fenced_mgr->mutex); + + assert(pipe_is_referenced(&fenced_buf->base.reference)); + assert(fenced_buf->buffer); + + if(fence != fenced_buf->fence) { + assert(fenced_buf->vl); + assert(fenced_buf->validation_flags); + + if (fenced_buf->fence) { + boolean destroyed; + destroyed = fenced_buffer_remove_locked(fenced_mgr, fenced_buf); + assert(!destroyed); + } + if (fence) { + ops->fence_reference(ops, &fenced_buf->fence, fence); + fenced_buf->flags |= fenced_buf->validation_flags; + fenced_buffer_add_locked(fenced_mgr, fenced_buf); + } + + pb_fence(fenced_buf->buffer, fence); + + fenced_buf->vl = NULL; + fenced_buf->validation_flags = 0; + } + + pipe_mutex_unlock(fenced_mgr->mutex); +} + + +static void +fenced_buffer_get_base_buffer(struct pb_buffer *buf, + struct pb_buffer **base_buf, + pb_size *offset) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + + pipe_mutex_lock(fenced_mgr->mutex); + + /* + * This should only be called when the buffer is validated. Typically + * when processing relocations. + */ + assert(fenced_buf->vl); + assert(fenced_buf->buffer); + + if(fenced_buf->buffer) + pb_get_base_buffer(fenced_buf->buffer, base_buf, offset); + else { + *base_buf = buf; + *offset = 0; + } + + pipe_mutex_unlock(fenced_mgr->mutex); +} + + +static const struct pb_vtbl +fenced_buffer_vtbl = { + fenced_buffer_destroy, + fenced_buffer_map, + fenced_buffer_unmap, + fenced_buffer_validate, + fenced_buffer_fence, + fenced_buffer_get_base_buffer +}; + + +/** + * Wrap a buffer in a fenced buffer. + */ +static struct pb_buffer * +fenced_bufmgr_create_buffer(struct pb_manager *mgr, + pb_size size, + const struct pb_desc *desc) +{ + struct fenced_manager *fenced_mgr = fenced_manager(mgr); + struct fenced_buffer *fenced_buf; + enum pipe_error ret; + + /* + * Don't stall the GPU, waste time evicting buffers, or waste memory + * trying to create a buffer that will most likely never fit into the + * graphics aperture. + */ + if(size > fenced_mgr->max_buffer_size) { + goto no_buffer; + } + + fenced_buf = CALLOC_STRUCT(fenced_buffer); + if(!fenced_buf) + goto no_buffer; + + pipe_reference_init(&fenced_buf->base.reference, 1); + fenced_buf->base.alignment = desc->alignment; + fenced_buf->base.usage = desc->usage; + fenced_buf->base.size = size; + fenced_buf->size = size; + fenced_buf->desc = *desc; + + fenced_buf->base.vtbl = &fenced_buffer_vtbl; + fenced_buf->mgr = fenced_mgr; + + pipe_mutex_lock(fenced_mgr->mutex); + + /* + * Try to create GPU storage without stalling, + */ + ret = fenced_buffer_create_gpu_storage_locked(fenced_mgr, fenced_buf, FALSE); + + /* + * Attempt to use CPU memory to avoid stalling the GPU. + */ + if(ret != PIPE_OK) { + ret = fenced_buffer_create_cpu_storage_locked(fenced_mgr, fenced_buf); + } + + /* + * Create GPU storage, waiting for some to be available. + */ + if(ret != PIPE_OK) { + ret = fenced_buffer_create_gpu_storage_locked(fenced_mgr, fenced_buf, TRUE); + } + + /* + * Give up. + */ + if(ret != PIPE_OK) { + goto no_storage; + } + + assert(fenced_buf->buffer || fenced_buf->data); + + LIST_ADDTAIL(&fenced_buf->head, &fenced_mgr->unfenced); + ++fenced_mgr->num_unfenced; + pipe_mutex_unlock(fenced_mgr->mutex); + + return &fenced_buf->base; + +no_storage: + pipe_mutex_unlock(fenced_mgr->mutex); + FREE(fenced_buf); +no_buffer: + return NULL; +} + + +static void +fenced_bufmgr_flush(struct pb_manager *mgr) +{ + struct fenced_manager *fenced_mgr = fenced_manager(mgr); + + pipe_mutex_lock(fenced_mgr->mutex); + while(fenced_manager_check_signalled_locked(fenced_mgr, TRUE)) + ; + pipe_mutex_unlock(fenced_mgr->mutex); + + assert(fenced_mgr->provider->flush); + if(fenced_mgr->provider->flush) + fenced_mgr->provider->flush(fenced_mgr->provider); +} + + +static void +fenced_bufmgr_destroy(struct pb_manager *mgr) +{ + struct fenced_manager *fenced_mgr = fenced_manager(mgr); + + pipe_mutex_lock(fenced_mgr->mutex); + + /* Wait on outstanding fences */ + while (fenced_mgr->num_fenced) { + pipe_mutex_unlock(fenced_mgr->mutex); +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) + sched_yield(); +#endif + pipe_mutex_lock(fenced_mgr->mutex); + while(fenced_manager_check_signalled_locked(fenced_mgr, TRUE)) + ; + } + +#ifdef DEBUG + /*assert(!fenced_mgr->num_unfenced);*/ +#endif + + pipe_mutex_unlock(fenced_mgr->mutex); + pipe_mutex_destroy(fenced_mgr->mutex); + + if(fenced_mgr->provider) + fenced_mgr->provider->destroy(fenced_mgr->provider); + + fenced_mgr->ops->destroy(fenced_mgr->ops); + + FREE(fenced_mgr); +} + + +struct pb_manager * +fenced_bufmgr_create(struct pb_manager *provider, + struct pb_fence_ops *ops, + pb_size max_buffer_size, + pb_size max_cpu_total_size) +{ + struct fenced_manager *fenced_mgr; + + if(!provider) + return NULL; + + fenced_mgr = CALLOC_STRUCT(fenced_manager); + if (!fenced_mgr) + return NULL; + + fenced_mgr->base.destroy = fenced_bufmgr_destroy; + fenced_mgr->base.create_buffer = fenced_bufmgr_create_buffer; + fenced_mgr->base.flush = fenced_bufmgr_flush; + + fenced_mgr->provider = provider; + fenced_mgr->ops = ops; + fenced_mgr->max_buffer_size = max_buffer_size; + fenced_mgr->max_cpu_total_size = max_cpu_total_size; + + LIST_INITHEAD(&fenced_mgr->fenced); + fenced_mgr->num_fenced = 0; + + LIST_INITHEAD(&fenced_mgr->unfenced); + fenced_mgr->num_unfenced = 0; + + pipe_mutex_init(fenced_mgr->mutex); + + return &fenced_mgr->base; +} diff --git a/drivers/video/Gallium/auxiliary/pipebuffer/pb_buffer_fenced.h b/drivers/video/Gallium/auxiliary/pipebuffer/pb_buffer_fenced.h new file mode 100644 index 0000000000..004c2b939a --- /dev/null +++ b/drivers/video/Gallium/auxiliary/pipebuffer/pb_buffer_fenced.h @@ -0,0 +1,104 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Buffer fencing. + * + * "Fenced buffers" is actually a misnomer. They should be referred as + * "fenceable buffers", i.e, buffers that can be fenced, but I couldn't find + * the word "fenceable" in the dictionary. + * + * A "fenced buffer" is a decorator around a normal buffer, which adds two + * special properties: + * - the ability for the destruction to be delayed by a fence; + * - reference counting. + * + * Usually DMA buffers have a life-time that will extend the life-time of its + * handle. The end-of-life is dictated by the fence signalling. + * + * Between the handle's destruction, and the fence signalling, the buffer is + * stored in a fenced buffer list. + * + * \author Jose Fonseca + */ + +#ifndef PB_BUFFER_FENCED_H_ +#define PB_BUFFER_FENCED_H_ + + +#include "util/u_debug.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +struct pipe_fence_handle; + + +/** + * List of buffers which are awaiting fence signalling. + */ +struct fenced_buffer_list; + + +struct pb_fence_ops +{ + void (*destroy)( struct pb_fence_ops *ops ); + + /** Set ptr = fence, with reference counting */ + void (*fence_reference)( struct pb_fence_ops *ops, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence ); + + /** + * Checks whether the fence has been signalled. + * \param flags driver-specific meaning + * \return zero on success. + */ + int (*fence_signalled)( struct pb_fence_ops *ops, + struct pipe_fence_handle *fence, + unsigned flag ); + + /** + * Wait for the fence to finish. + * \param flags driver-specific meaning + * \return zero on success. + */ + int (*fence_finish)( struct pb_fence_ops *ops, + struct pipe_fence_handle *fence, + unsigned flag ); +}; + + +#ifdef __cplusplus +} +#endif + +#endif /*PB_BUFFER_FENCED_H_*/ diff --git a/drivers/video/Gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/drivers/video/Gallium/auxiliary/pipebuffer/pb_buffer_malloc.c new file mode 100644 index 0000000000..b106a1a027 --- /dev/null +++ b/drivers/video/Gallium/auxiliary/pipebuffer/pb_buffer_malloc.c @@ -0,0 +1,198 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Implementation of malloc-based buffers to store data that can't be processed + * by the hardware. + * + * \author Jose Fonseca + */ + + +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +struct malloc_buffer +{ + struct pb_buffer base; + void *data; +}; + + +extern const struct pb_vtbl malloc_buffer_vtbl; + +static INLINE struct malloc_buffer * +malloc_buffer(struct pb_buffer *buf) +{ + assert(buf); + if (!buf) + return NULL; + assert(buf->vtbl == &malloc_buffer_vtbl); + return (struct malloc_buffer *)buf; +} + + +static void +malloc_buffer_destroy(struct pb_buffer *buf) +{ + align_free(malloc_buffer(buf)->data); + FREE(buf); +} + + +static void * +malloc_buffer_map(struct pb_buffer *buf, + unsigned flags, + void *flush_ctx) +{ + return malloc_buffer(buf)->data; +} + + +static void +malloc_buffer_unmap(struct pb_buffer *buf) +{ + /* No-op */ +} + + +static enum pipe_error +malloc_buffer_validate(struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags) +{ + assert(0); + return PIPE_ERROR; +} + + +static void +malloc_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + assert(0); +} + + +static void +malloc_buffer_get_base_buffer(struct pb_buffer *buf, + struct pb_buffer **base_buf, + pb_size *offset) +{ + *base_buf = buf; + *offset = 0; +} + + +const struct pb_vtbl +malloc_buffer_vtbl = { + malloc_buffer_destroy, + malloc_buffer_map, + malloc_buffer_unmap, + malloc_buffer_validate, + malloc_buffer_fence, + malloc_buffer_get_base_buffer +}; + + +struct pb_buffer * +pb_malloc_buffer_create(pb_size size, + const struct pb_desc *desc) +{ + struct malloc_buffer *buf; + + /* TODO: do a single allocation */ + + buf = CALLOC_STRUCT(malloc_buffer); + if(!buf) + return NULL; + + pipe_reference_init(&buf->base.reference, 1); + buf->base.usage = desc->usage; + buf->base.size = size; + buf->base.alignment = desc->alignment; + buf->base.vtbl = &malloc_buffer_vtbl; + + buf->data = align_malloc(size, desc->alignment < sizeof(void*) ? sizeof(void*) : desc->alignment); + if(!buf->data) { + FREE(buf); + return NULL; + } + + return &buf->base; +} + + +static struct pb_buffer * +pb_malloc_bufmgr_create_buffer(struct pb_manager *mgr, + pb_size size, + const struct pb_desc *desc) +{ + return pb_malloc_buffer_create(size, desc); +} + + +static void +pb_malloc_bufmgr_flush(struct pb_manager *mgr) +{ + /* No-op */ +} + + +static void +pb_malloc_bufmgr_destroy(struct pb_manager *mgr) +{ + /* No-op */ +} + + +static boolean +pb_malloc_bufmgr_is_buffer_busy( struct pb_manager *mgr, + struct pb_buffer *buf ) +{ + return FALSE; +} + + +static struct pb_manager +pb_malloc_bufmgr = { + pb_malloc_bufmgr_destroy, + pb_malloc_bufmgr_create_buffer, + pb_malloc_bufmgr_flush, + pb_malloc_bufmgr_is_buffer_busy +}; + + +struct pb_manager * +pb_malloc_bufmgr_create(void) +{ + return &pb_malloc_bufmgr; +} diff --git a/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr.h b/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr.h new file mode 100644 index 0000000000..960068c494 --- /dev/null +++ b/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -0,0 +1,218 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Buffer management. + * + * A buffer manager does only one basic thing: it creates buffers. Actually, + * "buffer factory" would probably a more accurate description. + * + * You can chain buffer managers so that you can have a finer grained memory + * management and pooling. + * + * For example, for a simple batch buffer manager you would chain: + * - the native buffer manager, which provides DMA memory from the graphics + * memory space; + * - the pool buffer manager, which keep around a pool of equally sized buffers + * to avoid latency associated with the native buffer manager; + * - the fenced buffer manager, which will delay buffer destruction until the + * the moment the card finishing processing it. + * + * \author Jose Fonseca + */ + +#ifndef PB_BUFMGR_H_ +#define PB_BUFMGR_H_ + + +#include "pb_buffer.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +struct pb_desc; + + +/** + * Abstract base class for all buffer managers. + */ +struct pb_manager +{ + void + (*destroy)( struct pb_manager *mgr ); + + struct pb_buffer * + (*create_buffer)( struct pb_manager *mgr, + pb_size size, + const struct pb_desc *desc); + + /** + * Flush all temporary-held buffers. + * + * Used mostly to aid debugging memory issues or to clean up resources when + * the drivers are long lived. + */ + void + (*flush)( struct pb_manager *mgr ); + + boolean + (*is_buffer_busy)( struct pb_manager *mgr, + struct pb_buffer *buf ); +}; + + +/** + * Malloc buffer provider. + * + * Simple wrapper around pb_malloc_buffer_create for convenience. + */ +struct pb_manager * +pb_malloc_bufmgr_create(void); + + +/** + * Static buffer pool sub-allocator. + * + * Manages the allocation of equally sized buffers. It does so by allocating + * a single big buffer and divide it equally sized buffers. + * + * It is meant to manage the allocation of batch buffer pools. + */ +struct pb_manager * +pool_bufmgr_create(struct pb_manager *provider, + pb_size n, pb_size size, + const struct pb_desc *desc); + + +/** + * Static sub-allocator based the old memory manager. + * + * It managers buffers of different sizes. It does so by allocating a buffer + * with the size of the heap, and then using the old mm memory manager to manage + * that heap. + */ +struct pb_manager * +mm_bufmgr_create(struct pb_manager *provider, + pb_size size, pb_size align2); + +/** + * Same as mm_bufmgr_create. + * + * Buffer will be release when the manager is destroyed. + */ +struct pb_manager * +mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, + pb_size size, pb_size align2); + + +/** + * Slab sub-allocator. + */ +struct pb_manager * +pb_slab_manager_create(struct pb_manager *provider, + pb_size bufSize, + pb_size slabSize, + const struct pb_desc *desc); + +/** + * Allow a range of buffer size, by aggregating multiple slabs sub-allocators + * with different bucket sizes. + */ +struct pb_manager * +pb_slab_range_manager_create(struct pb_manager *provider, + pb_size minBufSize, + pb_size maxBufSize, + pb_size slabSize, + const struct pb_desc *desc); + + +/** + * Time-based buffer cache. + * + * This manager keeps a cache of destroyed buffers during a time interval. + */ +struct pb_manager * +pb_cache_manager_create(struct pb_manager *provider, + unsigned usecs); + + +struct pb_fence_ops; + +/** + * Fenced buffer manager. + * + * This manager is just meant for convenience. It wraps the buffers returned + * by another manager in fenced buffers, so that + * + * NOTE: the buffer manager that provides the buffers will be destroyed + * at the same time. + */ +struct pb_manager * +fenced_bufmgr_create(struct pb_manager *provider, + struct pb_fence_ops *ops, + pb_size max_buffer_size, + pb_size max_cpu_total_size); + + +struct pb_manager * +pb_alt_manager_create(struct pb_manager *provider1, + struct pb_manager *provider2); + + +/** + * Ondemand buffer manager. + * + * Buffers are created in malloc'ed memory (fast and cached), and the constents + * is transfered to a buffer from the provider (typically in slow uncached + * memory) when there is an attempt to validate the buffer. + * + * Ideal for situations where one does not know before hand whether a given + * buffer will effectively be used by the hardware or not. + */ +struct pb_manager * +pb_ondemand_manager_create(struct pb_manager *provider); + + +/** + * Debug buffer manager to detect buffer under- and overflows. + * + * Under/overflow sizes should be a multiple of the largest alignment + */ +struct pb_manager * +pb_debug_manager_create(struct pb_manager *provider, + pb_size underflow_size, pb_size overflow_size); + + +#ifdef __cplusplus +} +#endif + +#endif /*PB_BUFMGR_H_*/ diff --git a/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c b/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c new file mode 100644 index 0000000000..f60c836f18 --- /dev/null +++ b/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c @@ -0,0 +1,120 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Allocate buffers from two alternative buffer providers. + * + * \author Jose Fonseca + */ + + +#include "pipe/p_compiler.h" +#include "util/u_debug.h" +#include "util/u_memory.h" + +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +struct pb_alt_manager +{ + struct pb_manager base; + + struct pb_manager *provider1; + struct pb_manager *provider2; +}; + + +static INLINE struct pb_alt_manager * +pb_alt_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_alt_manager *)mgr; +} + + +static struct pb_buffer * +pb_alt_manager_create_buffer(struct pb_manager *_mgr, + pb_size size, + const struct pb_desc *desc) +{ + struct pb_alt_manager *mgr = pb_alt_manager(_mgr); + struct pb_buffer *buf; + + buf = mgr->provider1->create_buffer(mgr->provider1, size, desc); + if(buf) + return buf; + + buf = mgr->provider2->create_buffer(mgr->provider2, size, desc); + return buf; +} + + +static void +pb_alt_manager_flush(struct pb_manager *_mgr) +{ + struct pb_alt_manager *mgr = pb_alt_manager(_mgr); + + assert(mgr->provider1->flush); + if(mgr->provider1->flush) + mgr->provider1->flush(mgr->provider1); + + assert(mgr->provider2->flush); + if(mgr->provider2->flush) + mgr->provider2->flush(mgr->provider2); +} + + +static void +pb_alt_manager_destroy(struct pb_manager *mgr) +{ + FREE(mgr); +} + + +struct pb_manager * +pb_alt_manager_create(struct pb_manager *provider1, + struct pb_manager *provider2) +{ + struct pb_alt_manager *mgr; + + if(!provider1 || !provider2) + return NULL; + + mgr = CALLOC_STRUCT(pb_alt_manager); + if (!mgr) + return NULL; + + mgr->base.destroy = pb_alt_manager_destroy; + mgr->base.create_buffer = pb_alt_manager_create_buffer; + mgr->base.flush = pb_alt_manager_flush; + mgr->provider1 = provider1; + mgr->provider2 = provider2; + + return &mgr->base; +} diff --git a/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c new file mode 100644 index 0000000000..0e6896afd0 --- /dev/null +++ b/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -0,0 +1,411 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Buffer cache. + * + * \author Jose Fonseca + * \author Thomas Hellström + */ + + +#include "pipe/p_compiler.h" +#include "util/u_debug.h" +#include "os/os_thread.h" +#include "util/u_memory.h" +#include "util/u_double_list.h" +#include "util/u_time.h" + +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +/** + * Convenience macro (type safe). + */ +#define SUPER(__derived) (&(__derived)->base) + + +struct pb_cache_manager; + + +/** + * Wrapper around a pipe buffer which adds delayed destruction. + */ +struct pb_cache_buffer +{ + struct pb_buffer base; + + struct pb_buffer *buffer; + struct pb_cache_manager *mgr; + + /** Caching time interval */ + int64_t start, end; + + struct list_head head; +}; + + +struct pb_cache_manager +{ + struct pb_manager base; + + struct pb_manager *provider; + unsigned usecs; + + pipe_mutex mutex; + + struct list_head delayed; + pb_size numDelayed; +}; + + +static INLINE struct pb_cache_buffer * +pb_cache_buffer(struct pb_buffer *buf) +{ + assert(buf); + return (struct pb_cache_buffer *)buf; +} + + +static INLINE struct pb_cache_manager * +pb_cache_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_cache_manager *)mgr; +} + + +/** + * Actually destroy the buffer. + */ +static INLINE void +_pb_cache_buffer_destroy(struct pb_cache_buffer *buf) +{ + struct pb_cache_manager *mgr = buf->mgr; + + LIST_DEL(&buf->head); + assert(mgr->numDelayed); + --mgr->numDelayed; + assert(!pipe_is_referenced(&buf->base.reference)); + pb_reference(&buf->buffer, NULL); + FREE(buf); +} + + +/** + * Free as many cache buffers from the list head as possible. + */ +static void +_pb_cache_buffer_list_check_free(struct pb_cache_manager *mgr) +{ + struct list_head *curr, *next; + struct pb_cache_buffer *buf; + int64_t now; + + now = os_time_get(); + + curr = mgr->delayed.next; + next = curr->next; + while(curr != &mgr->delayed) { + buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); + + if(!os_time_timeout(buf->start, buf->end, now)) + break; + + _pb_cache_buffer_destroy(buf); + + curr = next; + next = curr->next; + } +} + + +static void +pb_cache_buffer_destroy(struct pb_buffer *_buf) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + struct pb_cache_manager *mgr = buf->mgr; + + pipe_mutex_lock(mgr->mutex); + assert(!pipe_is_referenced(&buf->base.reference)); + + _pb_cache_buffer_list_check_free(mgr); + + buf->start = os_time_get(); + buf->end = buf->start + mgr->usecs; + LIST_ADDTAIL(&buf->head, &mgr->delayed); + ++mgr->numDelayed; + pipe_mutex_unlock(mgr->mutex); +} + + +static void * +pb_cache_buffer_map(struct pb_buffer *_buf, + unsigned flags, void *flush_ctx) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + return pb_map(buf->buffer, flags, flush_ctx); +} + + +static void +pb_cache_buffer_unmap(struct pb_buffer *_buf) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + pb_unmap(buf->buffer); +} + + +static enum pipe_error +pb_cache_buffer_validate(struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + return pb_validate(buf->buffer, vl, flags); +} + + +static void +pb_cache_buffer_fence(struct pb_buffer *_buf, + struct pipe_fence_handle *fence) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + pb_fence(buf->buffer, fence); +} + + +static void +pb_cache_buffer_get_base_buffer(struct pb_buffer *_buf, + struct pb_buffer **base_buf, + pb_size *offset) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + pb_get_base_buffer(buf->buffer, base_buf, offset); +} + + +const struct pb_vtbl +pb_cache_buffer_vtbl = { + pb_cache_buffer_destroy, + pb_cache_buffer_map, + pb_cache_buffer_unmap, + pb_cache_buffer_validate, + pb_cache_buffer_fence, + pb_cache_buffer_get_base_buffer +}; + + +static INLINE int +pb_cache_is_buffer_compat(struct pb_cache_buffer *buf, + pb_size size, + const struct pb_desc *desc) +{ + if(buf->base.size < size) + return 0; + + /* be lenient with size */ + if(buf->base.size >= 2*size) + return 0; + + if(!pb_check_alignment(desc->alignment, buf->base.alignment)) + return 0; + + if(!pb_check_usage(desc->usage, buf->base.usage)) + return 0; + + if (buf->mgr->provider->is_buffer_busy) { + if (buf->mgr->provider->is_buffer_busy(buf->mgr->provider, buf->buffer)) + return -1; + } else { + void *ptr = pb_map(buf->buffer, PB_USAGE_DONTBLOCK, NULL); + + if (!ptr) + return -1; + + pb_unmap(buf->buffer); + } + + return 1; +} + + +static struct pb_buffer * +pb_cache_manager_create_buffer(struct pb_manager *_mgr, + pb_size size, + const struct pb_desc *desc) +{ + struct pb_cache_manager *mgr = pb_cache_manager(_mgr); + struct pb_cache_buffer *buf; + struct pb_cache_buffer *curr_buf; + struct list_head *curr, *next; + int64_t now; + int ret = 0; + + pipe_mutex_lock(mgr->mutex); + + buf = NULL; + curr = mgr->delayed.next; + next = curr->next; + + /* search in the expired buffers, freeing them in the process */ + now = os_time_get(); + while(curr != &mgr->delayed) { + curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); + if(!buf && (ret = pb_cache_is_buffer_compat(curr_buf, size, desc) > 0)) + buf = curr_buf; + else if(os_time_timeout(curr_buf->start, curr_buf->end, now)) + _pb_cache_buffer_destroy(curr_buf); + else + /* This buffer (and all hereafter) are still hot in cache */ + break; + if (ret == -1) + break; + curr = next; + next = curr->next; + } + + /* keep searching in the hot buffers */ + if(!buf && ret != -1) { + while(curr != &mgr->delayed) { + curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); + ret = pb_cache_is_buffer_compat(curr_buf, size, desc); + if (ret > 0) { + buf = curr_buf; + break; + } + if (ret == -1) + break; + /* no need to check the timeout here */ + curr = next; + next = curr->next; + } + } + + if(buf) { + LIST_DEL(&buf->head); + --mgr->numDelayed; + pipe_mutex_unlock(mgr->mutex); + /* Increase refcount */ + pipe_reference_init(&buf->base.reference, 1); + return &buf->base; + } + + pipe_mutex_unlock(mgr->mutex); + + buf = CALLOC_STRUCT(pb_cache_buffer); + if(!buf) + return NULL; + + buf->buffer = mgr->provider->create_buffer(mgr->provider, size, desc); + + /* Empty the cache and try again. */ + if (!buf->buffer) { + mgr->base.flush(&mgr->base); + buf->buffer = mgr->provider->create_buffer(mgr->provider, size, desc); + } + + if(!buf->buffer) { + FREE(buf); + return NULL; + } + + assert(pipe_is_referenced(&buf->buffer->reference)); + assert(pb_check_alignment(desc->alignment, buf->buffer->alignment)); + assert(pb_check_usage(desc->usage, buf->buffer->usage)); + assert(buf->buffer->size >= size); + + pipe_reference_init(&buf->base.reference, 1); + buf->base.alignment = buf->buffer->alignment; + buf->base.usage = buf->buffer->usage; + buf->base.size = buf->buffer->size; + + buf->base.vtbl = &pb_cache_buffer_vtbl; + buf->mgr = mgr; + + return &buf->base; +} + + +static void +pb_cache_manager_flush(struct pb_manager *_mgr) +{ + struct pb_cache_manager *mgr = pb_cache_manager(_mgr); + struct list_head *curr, *next; + struct pb_cache_buffer *buf; + + pipe_mutex_lock(mgr->mutex); + curr = mgr->delayed.next; + next = curr->next; + while(curr != &mgr->delayed) { + buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); + _pb_cache_buffer_destroy(buf); + curr = next; + next = curr->next; + } + pipe_mutex_unlock(mgr->mutex); + + assert(mgr->provider->flush); + if(mgr->provider->flush) + mgr->provider->flush(mgr->provider); +} + + +static void +pb_cache_manager_destroy(struct pb_manager *mgr) +{ + pb_cache_manager_flush(mgr); + FREE(mgr); +} + + +struct pb_manager * +pb_cache_manager_create(struct pb_manager *provider, + unsigned usecs) +{ + struct pb_cache_manager *mgr; + + if(!provider) + return NULL; + + mgr = CALLOC_STRUCT(pb_cache_manager); + if (!mgr) + return NULL; + + mgr->base.destroy = pb_cache_manager_destroy; + mgr->base.create_buffer = pb_cache_manager_create_buffer; + mgr->base.flush = pb_cache_manager_flush; + mgr->provider = provider; + mgr->usecs = usecs; + LIST_INITHEAD(&mgr->delayed); + mgr->numDelayed = 0; + pipe_mutex_init(mgr->mutex); + + return &mgr->base; +} diff --git a/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c new file mode 100644 index 0000000000..567303aa55 --- /dev/null +++ b/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -0,0 +1,497 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Debug buffer manager to detect buffer under- and overflows. + * + * \author Jose Fonseca + */ + + +#include "pipe/p_compiler.h" +#include "util/u_debug.h" +#include "os/os_thread.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_double_list.h" +#include "util/u_time.h" +#include "util/u_debug_stack.h" + +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +#ifdef DEBUG + + +#define PB_DEBUG_CREATE_BACKTRACE 8 +#define PB_DEBUG_MAP_BACKTRACE 8 + + +/** + * Convenience macro (type safe). + */ +#define SUPER(__derived) (&(__derived)->base) + + +struct pb_debug_manager; + + +/** + * Wrapper around a pipe buffer which adds delayed destruction. + */ +struct pb_debug_buffer +{ + struct pb_buffer base; + + struct pb_buffer *buffer; + struct pb_debug_manager *mgr; + + pb_size underflow_size; + pb_size overflow_size; + + struct debug_stack_frame create_backtrace[PB_DEBUG_CREATE_BACKTRACE]; + + pipe_mutex mutex; + unsigned map_count; + struct debug_stack_frame map_backtrace[PB_DEBUG_MAP_BACKTRACE]; + + struct list_head head; +}; + + +struct pb_debug_manager +{ + struct pb_manager base; + + struct pb_manager *provider; + + pb_size underflow_size; + pb_size overflow_size; + + pipe_mutex mutex; + struct list_head list; +}; + + +static INLINE struct pb_debug_buffer * +pb_debug_buffer(struct pb_buffer *buf) +{ + assert(buf); + return (struct pb_debug_buffer *)buf; +} + + +static INLINE struct pb_debug_manager * +pb_debug_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_debug_manager *)mgr; +} + + +static const uint8_t random_pattern[32] = { + 0xaf, 0xcf, 0xa5, 0xa2, 0xc2, 0x63, 0x15, 0x1a, + 0x7e, 0xe2, 0x7e, 0x84, 0x15, 0x49, 0xa2, 0x1e, + 0x49, 0x63, 0xf5, 0x52, 0x74, 0x66, 0x9e, 0xc4, + 0x6d, 0xcf, 0x2c, 0x4a, 0x74, 0xe6, 0xfd, 0x94 +}; + + +static INLINE void +fill_random_pattern(uint8_t *dst, pb_size size) +{ + pb_size i = 0; + while(size--) { + *dst++ = random_pattern[i++]; + i &= sizeof(random_pattern) - 1; + } +} + + +static INLINE boolean +check_random_pattern(const uint8_t *dst, pb_size size, + pb_size *min_ofs, pb_size *max_ofs) +{ + boolean result = TRUE; + pb_size i; + *min_ofs = size; + *max_ofs = 0; + for(i = 0; i < size; ++i) { + if(*dst++ != random_pattern[i % sizeof(random_pattern)]) { + *min_ofs = MIN2(*min_ofs, i); + *max_ofs = MAX2(*max_ofs, i); + result = FALSE; + } + } + return result; +} + + +static void +pb_debug_buffer_fill(struct pb_debug_buffer *buf) +{ + uint8_t *map; + + map = pb_map(buf->buffer, PB_USAGE_CPU_WRITE, NULL); + assert(map); + if(map) { + fill_random_pattern(map, buf->underflow_size); + fill_random_pattern(map + buf->underflow_size + buf->base.size, + buf->overflow_size); + pb_unmap(buf->buffer); + } +} + + +/** + * Check for under/over flows. + * + * Should be called with the buffer unmaped. + */ +static void +pb_debug_buffer_check(struct pb_debug_buffer *buf) +{ + uint8_t *map; + + map = pb_map(buf->buffer, + PB_USAGE_CPU_READ | + PB_USAGE_UNSYNCHRONIZED, NULL); + assert(map); + if(map) { + boolean underflow, overflow; + pb_size min_ofs, max_ofs; + + underflow = !check_random_pattern(map, buf->underflow_size, + &min_ofs, &max_ofs); + if(underflow) { + debug_printf("buffer underflow (offset -%u%s to -%u bytes) detected\n", + buf->underflow_size - min_ofs, + min_ofs == 0 ? "+" : "", + buf->underflow_size - max_ofs); + } + + overflow = !check_random_pattern(map + buf->underflow_size + buf->base.size, + buf->overflow_size, + &min_ofs, &max_ofs); + if(overflow) { + debug_printf("buffer overflow (size %u plus offset %u to %u%s bytes) detected\n", + buf->base.size, + min_ofs, + max_ofs, + max_ofs == buf->overflow_size - 1 ? "+" : ""); + } + + if(underflow || overflow) + debug_backtrace_dump(buf->create_backtrace, PB_DEBUG_CREATE_BACKTRACE); + + debug_assert(!underflow); + debug_assert(!overflow); + + /* re-fill if not aborted */ + if(underflow) + fill_random_pattern(map, buf->underflow_size); + if(overflow) + fill_random_pattern(map + buf->underflow_size + buf->base.size, + buf->overflow_size); + + pb_unmap(buf->buffer); + } +} + + +static void +pb_debug_buffer_destroy(struct pb_buffer *_buf) +{ + struct pb_debug_buffer *buf = pb_debug_buffer(_buf); + struct pb_debug_manager *mgr = buf->mgr; + + assert(!pipe_is_referenced(&buf->base.reference)); + + pb_debug_buffer_check(buf); + + pipe_mutex_lock(mgr->mutex); + LIST_DEL(&buf->head); + pipe_mutex_unlock(mgr->mutex); + + pipe_mutex_destroy(buf->mutex); + + pb_reference(&buf->buffer, NULL); + FREE(buf); +} + + +static void * +pb_debug_buffer_map(struct pb_buffer *_buf, + unsigned flags, void *flush_ctx) +{ + struct pb_debug_buffer *buf = pb_debug_buffer(_buf); + void *map; + + pb_debug_buffer_check(buf); + + map = pb_map(buf->buffer, flags, flush_ctx); + if(!map) + return NULL; + + if(map) { + pipe_mutex_lock(buf->mutex); + ++buf->map_count; + debug_backtrace_capture(buf->map_backtrace, 1, PB_DEBUG_MAP_BACKTRACE); + pipe_mutex_unlock(buf->mutex); + } + + return (uint8_t *)map + buf->underflow_size; +} + + +static void +pb_debug_buffer_unmap(struct pb_buffer *_buf) +{ + struct pb_debug_buffer *buf = pb_debug_buffer(_buf); + + pipe_mutex_lock(buf->mutex); + assert(buf->map_count); + if(buf->map_count) + --buf->map_count; + pipe_mutex_unlock(buf->mutex); + + pb_unmap(buf->buffer); + + pb_debug_buffer_check(buf); +} + + +static void +pb_debug_buffer_get_base_buffer(struct pb_buffer *_buf, + struct pb_buffer **base_buf, + pb_size *offset) +{ + struct pb_debug_buffer *buf = pb_debug_buffer(_buf); + pb_get_base_buffer(buf->buffer, base_buf, offset); + *offset += buf->underflow_size; +} + + +static enum pipe_error +pb_debug_buffer_validate(struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pb_debug_buffer *buf = pb_debug_buffer(_buf); + + pipe_mutex_lock(buf->mutex); + if(buf->map_count) { + debug_printf("%s: attempting to validate a mapped buffer\n", __FUNCTION__); + debug_printf("last map backtrace is\n"); + debug_backtrace_dump(buf->map_backtrace, PB_DEBUG_MAP_BACKTRACE); + } + pipe_mutex_unlock(buf->mutex); + + pb_debug_buffer_check(buf); + + return pb_validate(buf->buffer, vl, flags); +} + + +static void +pb_debug_buffer_fence(struct pb_buffer *_buf, + struct pipe_fence_handle *fence) +{ + struct pb_debug_buffer *buf = pb_debug_buffer(_buf); + pb_fence(buf->buffer, fence); +} + + +const struct pb_vtbl +pb_debug_buffer_vtbl = { + pb_debug_buffer_destroy, + pb_debug_buffer_map, + pb_debug_buffer_unmap, + pb_debug_buffer_validate, + pb_debug_buffer_fence, + pb_debug_buffer_get_base_buffer +}; + + +static void +pb_debug_manager_dump_locked(struct pb_debug_manager *mgr) +{ + struct list_head *curr, *next; + struct pb_debug_buffer *buf; + + curr = mgr->list.next; + next = curr->next; + while(curr != &mgr->list) { + buf = LIST_ENTRY(struct pb_debug_buffer, curr, head); + + debug_printf("buffer = %p\n", (void *) buf); + debug_printf(" .size = 0x%x\n", buf->base.size); + debug_backtrace_dump(buf->create_backtrace, PB_DEBUG_CREATE_BACKTRACE); + + curr = next; + next = curr->next; + } + +} + + +static struct pb_buffer * +pb_debug_manager_create_buffer(struct pb_manager *_mgr, + pb_size size, + const struct pb_desc *desc) +{ + struct pb_debug_manager *mgr = pb_debug_manager(_mgr); + struct pb_debug_buffer *buf; + struct pb_desc real_desc; + pb_size real_size; + + assert(size); + assert(desc->alignment); + + buf = CALLOC_STRUCT(pb_debug_buffer); + if(!buf) + return NULL; + + real_size = mgr->underflow_size + size + mgr->overflow_size; + real_desc = *desc; + real_desc.usage |= PB_USAGE_CPU_WRITE; + real_desc.usage |= PB_USAGE_CPU_READ; + + buf->buffer = mgr->provider->create_buffer(mgr->provider, + real_size, + &real_desc); + if(!buf->buffer) { + FREE(buf); +#if 0 + pipe_mutex_lock(mgr->mutex); + debug_printf("%s: failed to create buffer\n", __FUNCTION__); + if(!LIST_IS_EMPTY(&mgr->list)) + pb_debug_manager_dump_locked(mgr); + pipe_mutex_unlock(mgr->mutex); +#endif + return NULL; + } + + assert(pipe_is_referenced(&buf->buffer->reference)); + assert(pb_check_alignment(real_desc.alignment, buf->buffer->alignment)); + assert(pb_check_usage(real_desc.usage, buf->buffer->usage)); + assert(buf->buffer->size >= real_size); + + pipe_reference_init(&buf->base.reference, 1); + buf->base.alignment = desc->alignment; + buf->base.usage = desc->usage; + buf->base.size = size; + + buf->base.vtbl = &pb_debug_buffer_vtbl; + buf->mgr = mgr; + + buf->underflow_size = mgr->underflow_size; + buf->overflow_size = buf->buffer->size - buf->underflow_size - size; + + debug_backtrace_capture(buf->create_backtrace, 1, PB_DEBUG_CREATE_BACKTRACE); + + pb_debug_buffer_fill(buf); + + pipe_mutex_init(buf->mutex); + + pipe_mutex_lock(mgr->mutex); + LIST_ADDTAIL(&buf->head, &mgr->list); + pipe_mutex_unlock(mgr->mutex); + + return &buf->base; +} + + +static void +pb_debug_manager_flush(struct pb_manager *_mgr) +{ + struct pb_debug_manager *mgr = pb_debug_manager(_mgr); + assert(mgr->provider->flush); + if(mgr->provider->flush) + mgr->provider->flush(mgr->provider); +} + + +static void +pb_debug_manager_destroy(struct pb_manager *_mgr) +{ + struct pb_debug_manager *mgr = pb_debug_manager(_mgr); + + pipe_mutex_lock(mgr->mutex); + if(!LIST_IS_EMPTY(&mgr->list)) { + debug_printf("%s: unfreed buffers\n", __FUNCTION__); + pb_debug_manager_dump_locked(mgr); + } + pipe_mutex_unlock(mgr->mutex); + + pipe_mutex_destroy(mgr->mutex); + mgr->provider->destroy(mgr->provider); + FREE(mgr); +} + + +struct pb_manager * +pb_debug_manager_create(struct pb_manager *provider, + pb_size underflow_size, pb_size overflow_size) +{ + struct pb_debug_manager *mgr; + + if(!provider) + return NULL; + + mgr = CALLOC_STRUCT(pb_debug_manager); + if (!mgr) + return NULL; + + mgr->base.destroy = pb_debug_manager_destroy; + mgr->base.create_buffer = pb_debug_manager_create_buffer; + mgr->base.flush = pb_debug_manager_flush; + mgr->provider = provider; + mgr->underflow_size = underflow_size; + mgr->overflow_size = overflow_size; + + pipe_mutex_init(mgr->mutex); + LIST_INITHEAD(&mgr->list); + + return &mgr->base; +} + + +#else /* !DEBUG */ + + +struct pb_manager * +pb_debug_manager_create(struct pb_manager *provider, + pb_size underflow_size, pb_size overflow_size) +{ + return provider; +} + + +#endif /* !DEBUG */ diff --git a/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c new file mode 100644 index 0000000000..453cf45b86 --- /dev/null +++ b/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -0,0 +1,320 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Buffer manager using the old texture memory manager. + * + * \author Jose Fonseca + */ + + +#include "pipe/p_defines.h" +#include "util/u_debug.h" +#include "os/os_thread.h" +#include "util/u_memory.h" +#include "util/u_double_list.h" +#include "util/u_mm.h" +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +/** + * Convenience macro (type safe). + */ +#define SUPER(__derived) (&(__derived)->base) + + +struct mm_pb_manager +{ + struct pb_manager base; + + pipe_mutex mutex; + + pb_size size; + struct mem_block *heap; + + pb_size align2; + + struct pb_buffer *buffer; + void *map; +}; + + +static INLINE struct mm_pb_manager * +mm_pb_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct mm_pb_manager *)mgr; +} + + +struct mm_buffer +{ + struct pb_buffer base; + + struct mm_pb_manager *mgr; + + struct mem_block *block; +}; + + +static INLINE struct mm_buffer * +mm_buffer(struct pb_buffer *buf) +{ + assert(buf); + return (struct mm_buffer *)buf; +} + + +static void +mm_buffer_destroy(struct pb_buffer *buf) +{ + struct mm_buffer *mm_buf = mm_buffer(buf); + struct mm_pb_manager *mm = mm_buf->mgr; + + assert(!pipe_is_referenced(&mm_buf->base.reference)); + + pipe_mutex_lock(mm->mutex); + u_mmFreeMem(mm_buf->block); + FREE(mm_buf); + pipe_mutex_unlock(mm->mutex); +} + + +static void * +mm_buffer_map(struct pb_buffer *buf, + unsigned flags, + void *flush_ctx) +{ + struct mm_buffer *mm_buf = mm_buffer(buf); + struct mm_pb_manager *mm = mm_buf->mgr; + + /* XXX: it will be necessary to remap here to propagate flush_ctx */ + + return (unsigned char *) mm->map + mm_buf->block->ofs; +} + + +static void +mm_buffer_unmap(struct pb_buffer *buf) +{ + /* No-op */ +} + + +static enum pipe_error +mm_buffer_validate(struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags) +{ + struct mm_buffer *mm_buf = mm_buffer(buf); + struct mm_pb_manager *mm = mm_buf->mgr; + return pb_validate(mm->buffer, vl, flags); +} + + +static void +mm_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + struct mm_buffer *mm_buf = mm_buffer(buf); + struct mm_pb_manager *mm = mm_buf->mgr; + pb_fence(mm->buffer, fence); +} + + +static void +mm_buffer_get_base_buffer(struct pb_buffer *buf, + struct pb_buffer **base_buf, + pb_size *offset) +{ + struct mm_buffer *mm_buf = mm_buffer(buf); + struct mm_pb_manager *mm = mm_buf->mgr; + pb_get_base_buffer(mm->buffer, base_buf, offset); + *offset += mm_buf->block->ofs; +} + + +static const struct pb_vtbl +mm_buffer_vtbl = { + mm_buffer_destroy, + mm_buffer_map, + mm_buffer_unmap, + mm_buffer_validate, + mm_buffer_fence, + mm_buffer_get_base_buffer +}; + + +static struct pb_buffer * +mm_bufmgr_create_buffer(struct pb_manager *mgr, + pb_size size, + const struct pb_desc *desc) +{ + struct mm_pb_manager *mm = mm_pb_manager(mgr); + struct mm_buffer *mm_buf; + + /* We don't handle alignments larger then the one initially setup */ + assert(pb_check_alignment(desc->alignment, (pb_size)1 << mm->align2)); + if(!pb_check_alignment(desc->alignment, (pb_size)1 << mm->align2)) + return NULL; + + pipe_mutex_lock(mm->mutex); + + mm_buf = CALLOC_STRUCT(mm_buffer); + if (!mm_buf) { + pipe_mutex_unlock(mm->mutex); + return NULL; + } + + pipe_reference_init(&mm_buf->base.reference, 1); + mm_buf->base.alignment = desc->alignment; + mm_buf->base.usage = desc->usage; + mm_buf->base.size = size; + + mm_buf->base.vtbl = &mm_buffer_vtbl; + + mm_buf->mgr = mm; + + mm_buf->block = u_mmAllocMem(mm->heap, (int)size, (int)mm->align2, 0); + if(!mm_buf->block) { +#if 0 + debug_printf("warning: heap full\n"); + mmDumpMemInfo(mm->heap); +#endif + FREE(mm_buf); + pipe_mutex_unlock(mm->mutex); + return NULL; + } + + /* Some sanity checks */ + assert(0 <= (pb_size)mm_buf->block->ofs && (pb_size)mm_buf->block->ofs < mm->size); + assert(size <= (pb_size)mm_buf->block->size && (pb_size)mm_buf->block->ofs + (pb_size)mm_buf->block->size <= mm->size); + + pipe_mutex_unlock(mm->mutex); + return SUPER(mm_buf); +} + + +static void +mm_bufmgr_flush(struct pb_manager *mgr) +{ + /* No-op */ +} + + +static void +mm_bufmgr_destroy(struct pb_manager *mgr) +{ + struct mm_pb_manager *mm = mm_pb_manager(mgr); + + pipe_mutex_lock(mm->mutex); + + u_mmDestroy(mm->heap); + + pb_unmap(mm->buffer); + pb_reference(&mm->buffer, NULL); + + pipe_mutex_unlock(mm->mutex); + + FREE(mgr); +} + + +struct pb_manager * +mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, + pb_size size, pb_size align2) +{ + struct mm_pb_manager *mm; + + if(!buffer) + return NULL; + + mm = CALLOC_STRUCT(mm_pb_manager); + if (!mm) + return NULL; + + mm->base.destroy = mm_bufmgr_destroy; + mm->base.create_buffer = mm_bufmgr_create_buffer; + mm->base.flush = mm_bufmgr_flush; + + mm->size = size; + mm->align2 = align2; /* 64-byte alignment */ + + pipe_mutex_init(mm->mutex); + + mm->buffer = buffer; + + mm->map = pb_map(mm->buffer, + PB_USAGE_CPU_READ | + PB_USAGE_CPU_WRITE, NULL); + if(!mm->map) + goto failure; + + mm->heap = u_mmInit(0, (int)size); + if (!mm->heap) + goto failure; + + return SUPER(mm); + +failure: +if(mm->heap) + u_mmDestroy(mm->heap); + if(mm->map) + pb_unmap(mm->buffer); + FREE(mm); + return NULL; +} + + +struct pb_manager * +mm_bufmgr_create(struct pb_manager *provider, + pb_size size, pb_size align2) +{ + struct pb_buffer *buffer; + struct pb_manager *mgr; + struct pb_desc desc; + + if(!provider) + return NULL; + + memset(&desc, 0, sizeof(desc)); + desc.alignment = 1 << align2; + + buffer = provider->create_buffer(provider, size, &desc); + if (!buffer) + return NULL; + + mgr = mm_bufmgr_create_from_buffer(buffer, size, align2); + if (!mgr) { + pb_reference(&buffer, NULL); + return NULL; + } + + return mgr; +} diff --git a/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c b/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c new file mode 100644 index 0000000000..e44bc9b17e --- /dev/null +++ b/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c @@ -0,0 +1,305 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * A variation of malloc buffers which get transferred to real graphics memory + * when there is an attempt to validate them. + * + * @author Jose Fonseca + */ + + +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +struct pb_ondemand_manager; + + +struct pb_ondemand_buffer +{ + struct pb_buffer base; + + struct pb_ondemand_manager *mgr; + + /** Regular malloc'ed memory */ + void *data; + unsigned mapcount; + + /** Real buffer */ + struct pb_buffer *buffer; + pb_size size; + struct pb_desc desc; +}; + + +struct pb_ondemand_manager +{ + struct pb_manager base; + + struct pb_manager *provider; +}; + + +extern const struct pb_vtbl pb_ondemand_buffer_vtbl; + +static INLINE struct pb_ondemand_buffer * +pb_ondemand_buffer(struct pb_buffer *buf) +{ + assert(buf); + if (!buf) + return NULL; + assert(buf->vtbl == &pb_ondemand_buffer_vtbl); + return (struct pb_ondemand_buffer *)buf; +} + +static INLINE struct pb_ondemand_manager * +pb_ondemand_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_ondemand_manager *)mgr; +} + + +static void +pb_ondemand_buffer_destroy(struct pb_buffer *_buf) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + pb_reference(&buf->buffer, NULL); + + align_free(buf->data); + + FREE(buf); +} + + +static void * +pb_ondemand_buffer_map(struct pb_buffer *_buf, + unsigned flags, void *flush_ctx) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + if(buf->buffer) { + assert(!buf->data); + return pb_map(buf->buffer, flags, flush_ctx); + } + else { + assert(buf->data); + ++buf->mapcount; + return buf->data; + } +} + + +static void +pb_ondemand_buffer_unmap(struct pb_buffer *_buf) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + if(buf->buffer) { + assert(!buf->data); + pb_unmap(buf->buffer); + } + else { + assert(buf->data); + assert(buf->mapcount); + if(buf->mapcount) + --buf->mapcount; + } +} + + +static enum pipe_error +pb_ondemand_buffer_instantiate(struct pb_ondemand_buffer *buf) +{ + if(!buf->buffer) { + struct pb_manager *provider = buf->mgr->provider; + uint8_t *map; + + assert(!buf->mapcount); + + buf->buffer = provider->create_buffer(provider, buf->size, &buf->desc); + if(!buf->buffer) + return PIPE_ERROR_OUT_OF_MEMORY; + + map = pb_map(buf->buffer, PB_USAGE_CPU_READ, NULL); + if(!map) { + pb_reference(&buf->buffer, NULL); + return PIPE_ERROR; + } + + memcpy(map, buf->data, buf->size); + + pb_unmap(buf->buffer); + + if(!buf->mapcount) { + FREE(buf->data); + buf->data = NULL; + } + } + + return PIPE_OK; +} + +static enum pipe_error +pb_ondemand_buffer_validate(struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + enum pipe_error ret; + + assert(!buf->mapcount); + if(buf->mapcount) + return PIPE_ERROR; + + ret = pb_ondemand_buffer_instantiate(buf); + if(ret != PIPE_OK) + return ret; + + return pb_validate(buf->buffer, vl, flags); +} + + +static void +pb_ondemand_buffer_fence(struct pb_buffer *_buf, + struct pipe_fence_handle *fence) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + assert(buf->buffer); + if(!buf->buffer) + return; + + pb_fence(buf->buffer, fence); +} + + +static void +pb_ondemand_buffer_get_base_buffer(struct pb_buffer *_buf, + struct pb_buffer **base_buf, + pb_size *offset) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + if(pb_ondemand_buffer_instantiate(buf) != PIPE_OK) { + assert(0); + *base_buf = &buf->base; + *offset = 0; + return; + } + + pb_get_base_buffer(buf->buffer, base_buf, offset); +} + + +const struct pb_vtbl +pb_ondemand_buffer_vtbl = { + pb_ondemand_buffer_destroy, + pb_ondemand_buffer_map, + pb_ondemand_buffer_unmap, + pb_ondemand_buffer_validate, + pb_ondemand_buffer_fence, + pb_ondemand_buffer_get_base_buffer +}; + + +static struct pb_buffer * +pb_ondemand_manager_create_buffer(struct pb_manager *_mgr, + pb_size size, + const struct pb_desc *desc) +{ + struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr); + struct pb_ondemand_buffer *buf; + + buf = CALLOC_STRUCT(pb_ondemand_buffer); + if(!buf) + return NULL; + + pipe_reference_init(&buf->base.reference, 1); + buf->base.alignment = desc->alignment; + buf->base.usage = desc->usage; + buf->base.size = size; + buf->base.vtbl = &pb_ondemand_buffer_vtbl; + + buf->mgr = mgr; + + buf->data = align_malloc(size, desc->alignment < sizeof(void*) ? sizeof(void*) : desc->alignment); + if(!buf->data) { + FREE(buf); + return NULL; + } + + buf->size = size; + buf->desc = *desc; + + return &buf->base; +} + + +static void +pb_ondemand_manager_flush(struct pb_manager *_mgr) +{ + struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr); + + mgr->provider->flush(mgr->provider); +} + + +static void +pb_ondemand_manager_destroy(struct pb_manager *_mgr) +{ + struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr); + + FREE(mgr); +} + + +struct pb_manager * +pb_ondemand_manager_create(struct pb_manager *provider) +{ + struct pb_ondemand_manager *mgr; + + if(!provider) + return NULL; + + mgr = CALLOC_STRUCT(pb_ondemand_manager); + if(!mgr) + return NULL; + + mgr->base.destroy = pb_ondemand_manager_destroy; + mgr->base.create_buffer = pb_ondemand_manager_create_buffer; + mgr->base.flush = pb_ondemand_manager_flush; + + mgr->provider = provider; + + return &mgr->base; +} diff --git a/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c new file mode 100644 index 0000000000..67a19fe8a6 --- /dev/null +++ b/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c @@ -0,0 +1,321 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/** + * \file + * Batch buffer pool management. + * + * \author Jose Fonseca + * \author Thomas Hellström + */ + + +#include "pipe/p_compiler.h" +#include "util/u_debug.h" +#include "os/os_thread.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "util/u_double_list.h" + +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +/** + * Convenience macro (type safe). + */ +#define SUPER(__derived) (&(__derived)->base) + + +struct pool_pb_manager +{ + struct pb_manager base; + + pipe_mutex mutex; + + pb_size bufSize; + pb_size bufAlign; + + pb_size numFree; + pb_size numTot; + + struct list_head free; + + struct pb_buffer *buffer; + void *map; + + struct pool_buffer *bufs; +}; + + +static INLINE struct pool_pb_manager * +pool_pb_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pool_pb_manager *)mgr; +} + + +struct pool_buffer +{ + struct pb_buffer base; + + struct pool_pb_manager *mgr; + + struct list_head head; + + pb_size start; +}; + + +static INLINE struct pool_buffer * +pool_buffer(struct pb_buffer *buf) +{ + assert(buf); + return (struct pool_buffer *)buf; +} + + + +static void +pool_buffer_destroy(struct pb_buffer *buf) +{ + struct pool_buffer *pool_buf = pool_buffer(buf); + struct pool_pb_manager *pool = pool_buf->mgr; + + assert(!pipe_is_referenced(&pool_buf->base.reference)); + + pipe_mutex_lock(pool->mutex); + LIST_ADD(&pool_buf->head, &pool->free); + pool->numFree++; + pipe_mutex_unlock(pool->mutex); +} + + +static void * +pool_buffer_map(struct pb_buffer *buf, unsigned flags, void *flush_ctx) +{ + struct pool_buffer *pool_buf = pool_buffer(buf); + struct pool_pb_manager *pool = pool_buf->mgr; + void *map; + + /* XXX: it will be necessary to remap here to propagate flush_ctx */ + + pipe_mutex_lock(pool->mutex); + map = (unsigned char *) pool->map + pool_buf->start; + pipe_mutex_unlock(pool->mutex); + return map; +} + + +static void +pool_buffer_unmap(struct pb_buffer *buf) +{ + /* No-op */ +} + + +static enum pipe_error +pool_buffer_validate(struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pool_buffer *pool_buf = pool_buffer(buf); + struct pool_pb_manager *pool = pool_buf->mgr; + return pb_validate(pool->buffer, vl, flags); +} + + +static void +pool_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + struct pool_buffer *pool_buf = pool_buffer(buf); + struct pool_pb_manager *pool = pool_buf->mgr; + pb_fence(pool->buffer, fence); +} + + +static void +pool_buffer_get_base_buffer(struct pb_buffer *buf, + struct pb_buffer **base_buf, + pb_size *offset) +{ + struct pool_buffer *pool_buf = pool_buffer(buf); + struct pool_pb_manager *pool = pool_buf->mgr; + pb_get_base_buffer(pool->buffer, base_buf, offset); + *offset += pool_buf->start; +} + + +static const struct pb_vtbl +pool_buffer_vtbl = { + pool_buffer_destroy, + pool_buffer_map, + pool_buffer_unmap, + pool_buffer_validate, + pool_buffer_fence, + pool_buffer_get_base_buffer +}; + + +static struct pb_buffer * +pool_bufmgr_create_buffer(struct pb_manager *mgr, + pb_size size, + const struct pb_desc *desc) +{ + struct pool_pb_manager *pool = pool_pb_manager(mgr); + struct pool_buffer *pool_buf; + struct list_head *item; + + assert(size == pool->bufSize); + assert(pool->bufAlign % desc->alignment == 0); + + pipe_mutex_lock(pool->mutex); + + if (pool->numFree == 0) { + pipe_mutex_unlock(pool->mutex); + debug_printf("warning: out of fixed size buffer objects\n"); + return NULL; + } + + item = pool->free.next; + + if (item == &pool->free) { + pipe_mutex_unlock(pool->mutex); + debug_printf("error: fixed size buffer pool corruption\n"); + return NULL; + } + + LIST_DEL(item); + --pool->numFree; + + pipe_mutex_unlock(pool->mutex); + + pool_buf = LIST_ENTRY(struct pool_buffer, item, head); + assert(!pipe_is_referenced(&pool_buf->base.reference)); + pipe_reference_init(&pool_buf->base.reference, 1); + pool_buf->base.alignment = desc->alignment; + pool_buf->base.usage = desc->usage; + + return SUPER(pool_buf); +} + + +static void +pool_bufmgr_flush(struct pb_manager *mgr) +{ + /* No-op */ +} + + +static void +pool_bufmgr_destroy(struct pb_manager *mgr) +{ + struct pool_pb_manager *pool = pool_pb_manager(mgr); + pipe_mutex_lock(pool->mutex); + + FREE(pool->bufs); + + pb_unmap(pool->buffer); + pb_reference(&pool->buffer, NULL); + + pipe_mutex_unlock(pool->mutex); + + FREE(mgr); +} + + +struct pb_manager * +pool_bufmgr_create(struct pb_manager *provider, + pb_size numBufs, + pb_size bufSize, + const struct pb_desc *desc) +{ + struct pool_pb_manager *pool; + struct pool_buffer *pool_buf; + pb_size i; + + if(!provider) + return NULL; + + pool = CALLOC_STRUCT(pool_pb_manager); + if (!pool) + return NULL; + + pool->base.destroy = pool_bufmgr_destroy; + pool->base.create_buffer = pool_bufmgr_create_buffer; + pool->base.flush = pool_bufmgr_flush; + + LIST_INITHEAD(&pool->free); + + pool->numTot = numBufs; + pool->numFree = numBufs; + pool->bufSize = bufSize; + pool->bufAlign = desc->alignment; + + pipe_mutex_init(pool->mutex); + + pool->buffer = provider->create_buffer(provider, numBufs*bufSize, desc); + if (!pool->buffer) + goto failure; + + pool->map = pb_map(pool->buffer, + PB_USAGE_CPU_READ | + PB_USAGE_CPU_WRITE, NULL); + if(!pool->map) + goto failure; + + pool->bufs = (struct pool_buffer *)CALLOC(numBufs, sizeof(*pool->bufs)); + if (!pool->bufs) + goto failure; + + pool_buf = pool->bufs; + for (i = 0; i < numBufs; ++i) { + pipe_reference_init(&pool_buf->base.reference, 0); + pool_buf->base.alignment = 0; + pool_buf->base.usage = 0; + pool_buf->base.size = bufSize; + pool_buf->base.vtbl = &pool_buffer_vtbl; + pool_buf->mgr = pool; + pool_buf->start = i * bufSize; + LIST_ADDTAIL(&pool_buf->head, &pool->free); + pool_buf++; + } + + return SUPER(pool); + +failure: + FREE(pool->bufs); + if(pool->map) + pb_unmap(pool->buffer); + if(pool->buffer) + pb_reference(&pool->buffer, NULL); + FREE(pool); + return NULL; +} diff --git a/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c new file mode 100644 index 0000000000..bd84b622b6 --- /dev/null +++ b/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -0,0 +1,590 @@ +/************************************************************************** + * + * Copyright 2006-2008 Tungsten Graphics, Inc., Cedar Park, TX., USA + * All Rights Reserved. + * + * Permission is hereby granted, FREE of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/** + * @file + * S-lab pool implementation. + * + * @sa http://en.wikipedia.org/wiki/Slab_allocation + * + * @author Thomas Hellstrom + * @author Jose Fonseca + */ + +#include "pipe/p_compiler.h" +#include "util/u_debug.h" +#include "os/os_thread.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "util/u_double_list.h" +#include "util/u_time.h" + +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +struct pb_slab; + + +/** + * Buffer in a slab. + * + * Sub-allocation of a contiguous buffer. + */ +struct pb_slab_buffer +{ + struct pb_buffer base; + + struct pb_slab *slab; + + struct list_head head; + + unsigned mapCount; + + /** Offset relative to the start of the slab buffer. */ + pb_size start; + + /** Use when validating, to signal that all mappings are finished */ + /* TODO: Actually validation does not reach this stage yet */ + pipe_condvar event; +}; + + +/** + * Slab -- a contiguous piece of memory. + */ +struct pb_slab +{ + struct list_head head; + struct list_head freeBuffers; + pb_size numBuffers; + pb_size numFree; + + struct pb_slab_buffer *buffers; + struct pb_slab_manager *mgr; + + /** Buffer from the provider */ + struct pb_buffer *bo; + + void *virtual; +}; + + +/** + * It adds/removes slabs as needed in order to meet the allocation/destruction + * of individual buffers. + */ +struct pb_slab_manager +{ + struct pb_manager base; + + /** From where we get our buffers */ + struct pb_manager *provider; + + /** Size of the buffers we hand on downstream */ + pb_size bufSize; + + /** Size of the buffers we request upstream */ + pb_size slabSize; + + /** + * Alignment, usage to be used to allocate the slab buffers. + * + * We can only provide buffers which are consistent (in alignment, usage) + * with this description. + */ + struct pb_desc desc; + + /** + * Partial slabs + * + * Full slabs are not stored in any list. Empty slabs are destroyed + * immediatly. + */ + struct list_head slabs; + + pipe_mutex mutex; +}; + + +/** + * Wrapper around several slabs, therefore capable of handling buffers of + * multiple sizes. + * + * This buffer manager just dispatches buffer allocations to the appropriate slab + * manager, according to the requested buffer size, or by passes the slab + * managers altogether for even greater sizes. + * + * The data of this structure remains constant after + * initialization and thus needs no mutex protection. + */ +struct pb_slab_range_manager +{ + struct pb_manager base; + + struct pb_manager *provider; + + pb_size minBufSize; + pb_size maxBufSize; + + /** @sa pb_slab_manager::desc */ + struct pb_desc desc; + + unsigned numBuckets; + pb_size *bucketSizes; + + /** Array of pb_slab_manager, one for each bucket size */ + struct pb_manager **buckets; +}; + + +static INLINE struct pb_slab_buffer * +pb_slab_buffer(struct pb_buffer *buf) +{ + assert(buf); + return (struct pb_slab_buffer *)buf; +} + + +static INLINE struct pb_slab_manager * +pb_slab_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_slab_manager *)mgr; +} + + +static INLINE struct pb_slab_range_manager * +pb_slab_range_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_slab_range_manager *)mgr; +} + + +/** + * Delete a buffer from the slab delayed list and put + * it on the slab FREE list. + */ +static void +pb_slab_buffer_destroy(struct pb_buffer *_buf) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + struct pb_slab *slab = buf->slab; + struct pb_slab_manager *mgr = slab->mgr; + struct list_head *list = &buf->head; + + pipe_mutex_lock(mgr->mutex); + + assert(!pipe_is_referenced(&buf->base.reference)); + + buf->mapCount = 0; + + LIST_DEL(list); + LIST_ADDTAIL(list, &slab->freeBuffers); + slab->numFree++; + + if (slab->head.next == &slab->head) + LIST_ADDTAIL(&slab->head, &mgr->slabs); + + /* If the slab becomes totally empty, free it */ + if (slab->numFree == slab->numBuffers) { + list = &slab->head; + LIST_DELINIT(list); + pb_reference(&slab->bo, NULL); + FREE(slab->buffers); + FREE(slab); + } + + pipe_mutex_unlock(mgr->mutex); +} + + +static void * +pb_slab_buffer_map(struct pb_buffer *_buf, + unsigned flags, + void *flush_ctx) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + + /* XXX: it will be necessary to remap here to propagate flush_ctx */ + + ++buf->mapCount; + return (void *) ((uint8_t *) buf->slab->virtual + buf->start); +} + + +static void +pb_slab_buffer_unmap(struct pb_buffer *_buf) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + + --buf->mapCount; + if (buf->mapCount == 0) + pipe_condvar_broadcast(buf->event); +} + + +static enum pipe_error +pb_slab_buffer_validate(struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + return pb_validate(buf->slab->bo, vl, flags); +} + + +static void +pb_slab_buffer_fence(struct pb_buffer *_buf, + struct pipe_fence_handle *fence) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + pb_fence(buf->slab->bo, fence); +} + + +static void +pb_slab_buffer_get_base_buffer(struct pb_buffer *_buf, + struct pb_buffer **base_buf, + pb_size *offset) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + pb_get_base_buffer(buf->slab->bo, base_buf, offset); + *offset += buf->start; +} + + +static const struct pb_vtbl +pb_slab_buffer_vtbl = { + pb_slab_buffer_destroy, + pb_slab_buffer_map, + pb_slab_buffer_unmap, + pb_slab_buffer_validate, + pb_slab_buffer_fence, + pb_slab_buffer_get_base_buffer +}; + + +/** + * Create a new slab. + * + * Called when we ran out of free slabs. + */ +static enum pipe_error +pb_slab_create(struct pb_slab_manager *mgr) +{ + struct pb_slab *slab; + struct pb_slab_buffer *buf; + unsigned numBuffers; + unsigned i; + enum pipe_error ret; + + slab = CALLOC_STRUCT(pb_slab); + if (!slab) + return PIPE_ERROR_OUT_OF_MEMORY; + + slab->bo = mgr->provider->create_buffer(mgr->provider, mgr->slabSize, &mgr->desc); + if(!slab->bo) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out_err0; + } + + /* Note down the slab virtual address. All mappings are accessed directly + * through this address so it is required that the buffer is pinned. */ + slab->virtual = pb_map(slab->bo, + PB_USAGE_CPU_READ | + PB_USAGE_CPU_WRITE, NULL); + if(!slab->virtual) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out_err1; + } + pb_unmap(slab->bo); + + numBuffers = slab->bo->size / mgr->bufSize; + + slab->buffers = CALLOC(numBuffers, sizeof(*slab->buffers)); + if (!slab->buffers) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out_err1; + } + + LIST_INITHEAD(&slab->head); + LIST_INITHEAD(&slab->freeBuffers); + slab->numBuffers = numBuffers; + slab->numFree = 0; + slab->mgr = mgr; + + buf = slab->buffers; + for (i=0; i < numBuffers; ++i) { + pipe_reference_init(&buf->base.reference, 0); + buf->base.size = mgr->bufSize; + buf->base.alignment = 0; + buf->base.usage = 0; + buf->base.vtbl = &pb_slab_buffer_vtbl; + buf->slab = slab; + buf->start = i* mgr->bufSize; + buf->mapCount = 0; + pipe_condvar_init(buf->event); + LIST_ADDTAIL(&buf->head, &slab->freeBuffers); + slab->numFree++; + buf++; + } + + /* Add this slab to the list of partial slabs */ + LIST_ADDTAIL(&slab->head, &mgr->slabs); + + return PIPE_OK; + +out_err1: + pb_reference(&slab->bo, NULL); +out_err0: + FREE(slab); + return ret; +} + + +static struct pb_buffer * +pb_slab_manager_create_buffer(struct pb_manager *_mgr, + pb_size size, + const struct pb_desc *desc) +{ + struct pb_slab_manager *mgr = pb_slab_manager(_mgr); + static struct pb_slab_buffer *buf; + struct pb_slab *slab; + struct list_head *list; + + /* check size */ + assert(size <= mgr->bufSize); + if(size > mgr->bufSize) + return NULL; + + /* check if we can provide the requested alignment */ + assert(pb_check_alignment(desc->alignment, mgr->desc.alignment)); + if(!pb_check_alignment(desc->alignment, mgr->desc.alignment)) + return NULL; + assert(pb_check_alignment(desc->alignment, mgr->bufSize)); + if(!pb_check_alignment(desc->alignment, mgr->bufSize)) + return NULL; + + assert(pb_check_usage(desc->usage, mgr->desc.usage)); + if(!pb_check_usage(desc->usage, mgr->desc.usage)) + return NULL; + + pipe_mutex_lock(mgr->mutex); + + /* Create a new slab, if we run out of partial slabs */ + if (mgr->slabs.next == &mgr->slabs) { + (void) pb_slab_create(mgr); + if (mgr->slabs.next == &mgr->slabs) { + pipe_mutex_unlock(mgr->mutex); + return NULL; + } + } + + /* Allocate the buffer from a partial (or just created) slab */ + list = mgr->slabs.next; + slab = LIST_ENTRY(struct pb_slab, list, head); + + /* If totally full remove from the partial slab list */ + if (--slab->numFree == 0) + LIST_DELINIT(list); + + list = slab->freeBuffers.next; + LIST_DELINIT(list); + + pipe_mutex_unlock(mgr->mutex); + buf = LIST_ENTRY(struct pb_slab_buffer, list, head); + + pipe_reference_init(&buf->base.reference, 1); + buf->base.alignment = desc->alignment; + buf->base.usage = desc->usage; + + return &buf->base; +} + + +static void +pb_slab_manager_flush(struct pb_manager *_mgr) +{ + struct pb_slab_manager *mgr = pb_slab_manager(_mgr); + + assert(mgr->provider->flush); + if(mgr->provider->flush) + mgr->provider->flush(mgr->provider); +} + + +static void +pb_slab_manager_destroy(struct pb_manager *_mgr) +{ + struct pb_slab_manager *mgr = pb_slab_manager(_mgr); + + /* TODO: cleanup all allocated buffers */ + FREE(mgr); +} + + +struct pb_manager * +pb_slab_manager_create(struct pb_manager *provider, + pb_size bufSize, + pb_size slabSize, + const struct pb_desc *desc) +{ + struct pb_slab_manager *mgr; + + mgr = CALLOC_STRUCT(pb_slab_manager); + if (!mgr) + return NULL; + + mgr->base.destroy = pb_slab_manager_destroy; + mgr->base.create_buffer = pb_slab_manager_create_buffer; + mgr->base.flush = pb_slab_manager_flush; + + mgr->provider = provider; + mgr->bufSize = bufSize; + mgr->slabSize = slabSize; + mgr->desc = *desc; + + LIST_INITHEAD(&mgr->slabs); + + pipe_mutex_init(mgr->mutex); + + return &mgr->base; +} + + +static struct pb_buffer * +pb_slab_range_manager_create_buffer(struct pb_manager *_mgr, + pb_size size, + const struct pb_desc *desc) +{ + struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr); + pb_size bufSize; + pb_size reqSize = size; + unsigned i; + + if(desc->alignment > reqSize) + reqSize = desc->alignment; + + bufSize = mgr->minBufSize; + for (i = 0; i < mgr->numBuckets; ++i) { + if(bufSize >= reqSize) + return mgr->buckets[i]->create_buffer(mgr->buckets[i], size, desc); + bufSize *= 2; + } + + /* Fall back to allocate a buffer object directly from the provider. */ + return mgr->provider->create_buffer(mgr->provider, size, desc); +} + + +static void +pb_slab_range_manager_flush(struct pb_manager *_mgr) +{ + struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr); + + /* Individual slabs don't hold any temporary buffers so no need to call them */ + + assert(mgr->provider->flush); + if(mgr->provider->flush) + mgr->provider->flush(mgr->provider); +} + + +static void +pb_slab_range_manager_destroy(struct pb_manager *_mgr) +{ + struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr); + unsigned i; + + for (i = 0; i < mgr->numBuckets; ++i) + mgr->buckets[i]->destroy(mgr->buckets[i]); + FREE(mgr->buckets); + FREE(mgr->bucketSizes); + FREE(mgr); +} + + +struct pb_manager * +pb_slab_range_manager_create(struct pb_manager *provider, + pb_size minBufSize, + pb_size maxBufSize, + pb_size slabSize, + const struct pb_desc *desc) +{ + struct pb_slab_range_manager *mgr; + pb_size bufSize; + unsigned i; + + if(!provider) + return NULL; + + mgr = CALLOC_STRUCT(pb_slab_range_manager); + if (!mgr) + goto out_err0; + + mgr->base.destroy = pb_slab_range_manager_destroy; + mgr->base.create_buffer = pb_slab_range_manager_create_buffer; + mgr->base.flush = pb_slab_range_manager_flush; + + mgr->provider = provider; + mgr->minBufSize = minBufSize; + mgr->maxBufSize = maxBufSize; + + mgr->numBuckets = 1; + bufSize = minBufSize; + while(bufSize < maxBufSize) { + bufSize *= 2; + ++mgr->numBuckets; + } + + mgr->buckets = CALLOC(mgr->numBuckets, sizeof(*mgr->buckets)); + if (!mgr->buckets) + goto out_err1; + + bufSize = minBufSize; + for (i = 0; i < mgr->numBuckets; ++i) { + mgr->buckets[i] = pb_slab_manager_create(provider, bufSize, slabSize, desc); + if(!mgr->buckets[i]) + goto out_err2; + bufSize *= 2; + } + + return &mgr->base; + +out_err2: + for (i = 0; i < mgr->numBuckets; ++i) + if(mgr->buckets[i]) + mgr->buckets[i]->destroy(mgr->buckets[i]); + FREE(mgr->buckets); +out_err1: + FREE(mgr); +out_err0: + return NULL; +} diff --git a/drivers/video/Gallium/auxiliary/pipebuffer/pb_validate.c b/drivers/video/Gallium/auxiliary/pipebuffer/pb_validate.c new file mode 100644 index 0000000000..b585422460 --- /dev/null +++ b/drivers/video/Gallium/auxiliary/pipebuffer/pb_validate.c @@ -0,0 +1,192 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Buffer validation. + * + * @author Jose Fonseca + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "util/u_debug.h" + +#include "pb_buffer.h" +#include "pb_validate.h" + + +#define PB_VALIDATE_INITIAL_SIZE 1 /* 512 */ + + +struct pb_validate_entry +{ + struct pb_buffer *buf; + unsigned flags; +}; + + +struct pb_validate +{ + struct pb_validate_entry *entries; + unsigned used; + unsigned size; +}; + + +enum pipe_error +pb_validate_add_buffer(struct pb_validate *vl, + struct pb_buffer *buf, + unsigned flags) +{ + assert(buf); + if(!buf) + return PIPE_ERROR; + + assert(flags & PB_USAGE_GPU_READ_WRITE); + assert(!(flags & ~PB_USAGE_GPU_READ_WRITE)); + flags &= PB_USAGE_GPU_READ_WRITE; + + /* We only need to store one reference for each buffer, so avoid storing + * consecutive references for the same buffer. It might not be the most + * common pattern, but it is easy to implement. + */ + if(vl->used && vl->entries[vl->used - 1].buf == buf) { + vl->entries[vl->used - 1].flags |= flags; + return PIPE_OK; + } + + /* Grow the table */ + if(vl->used == vl->size) { + unsigned new_size; + struct pb_validate_entry *new_entries; + + new_size = vl->size * 2; + if(!new_size) + return PIPE_ERROR_OUT_OF_MEMORY; + + new_entries = (struct pb_validate_entry *)REALLOC(vl->entries, + vl->size*sizeof(struct pb_validate_entry), + new_size*sizeof(struct pb_validate_entry)); + if(!new_entries) + return PIPE_ERROR_OUT_OF_MEMORY; + + memset(new_entries + vl->size, 0, (new_size - vl->size)*sizeof(struct pb_validate_entry)); + + vl->size = new_size; + vl->entries = new_entries; + } + + assert(!vl->entries[vl->used].buf); + pb_reference(&vl->entries[vl->used].buf, buf); + vl->entries[vl->used].flags = flags; + ++vl->used; + + return PIPE_OK; +} + + +enum pipe_error +pb_validate_foreach(struct pb_validate *vl, + enum pipe_error (*callback)(struct pb_buffer *buf, void *data), + void *data) +{ + unsigned i; + for(i = 0; i < vl->used; ++i) { + enum pipe_error ret; + ret = callback(vl->entries[i].buf, data); + if(ret != PIPE_OK) + return ret; + } + return PIPE_OK; +} + + +enum pipe_error +pb_validate_validate(struct pb_validate *vl) +{ + unsigned i; + + for(i = 0; i < vl->used; ++i) { + enum pipe_error ret; + ret = pb_validate(vl->entries[i].buf, vl, vl->entries[i].flags); + if(ret != PIPE_OK) { + while(i--) + pb_validate(vl->entries[i].buf, NULL, 0); + return ret; + } + } + + return PIPE_OK; +} + + +void +pb_validate_fence(struct pb_validate *vl, + struct pipe_fence_handle *fence) +{ + unsigned i; + for(i = 0; i < vl->used; ++i) { + pb_fence(vl->entries[i].buf, fence); + pb_reference(&vl->entries[i].buf, NULL); + } + vl->used = 0; +} + + +void +pb_validate_destroy(struct pb_validate *vl) +{ + unsigned i; + for(i = 0; i < vl->used; ++i) + pb_reference(&vl->entries[i].buf, NULL); + FREE(vl->entries); + FREE(vl); +} + + +struct pb_validate * +pb_validate_create() +{ + struct pb_validate *vl; + + vl = CALLOC_STRUCT(pb_validate); + if(!vl) + return NULL; + + vl->size = PB_VALIDATE_INITIAL_SIZE; + vl->entries = (struct pb_validate_entry *)CALLOC(vl->size, sizeof(struct pb_validate_entry)); + if(!vl->entries) { + FREE(vl); + return NULL; + } + + return vl; +} + diff --git a/drivers/video/Gallium/auxiliary/pipebuffer/pb_validate.h b/drivers/video/Gallium/auxiliary/pipebuffer/pb_validate.h new file mode 100644 index 0000000000..3c93f30f20 --- /dev/null +++ b/drivers/video/Gallium/auxiliary/pipebuffer/pb_validate.h @@ -0,0 +1,97 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Buffer validation. + * + * @author Jose Fonseca + */ + +#ifndef PB_VALIDATE_H_ +#define PB_VALIDATE_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +struct pb_buffer; +struct pipe_fence_handle; + + +/** + * Buffer validation list. + * + * It holds a list of buffers to be validated and fenced when flushing. + */ +struct pb_validate; + + +enum pipe_error +pb_validate_add_buffer(struct pb_validate *vl, + struct pb_buffer *buf, + unsigned flags); + +enum pipe_error +pb_validate_foreach(struct pb_validate *vl, + enum pipe_error (*callback)(struct pb_buffer *buf, void *data), + void *data); + +/** + * Validate all buffers for hardware access. + * + * Should be called right before issuing commands to the hardware. + */ +enum pipe_error +pb_validate_validate(struct pb_validate *vl); + +/** + * Fence all buffers and clear the list. + * + * Should be called right after issuing commands to the hardware. + */ +void +pb_validate_fence(struct pb_validate *vl, + struct pipe_fence_handle *fence); + +struct pb_validate * +pb_validate_create(void); + +void +pb_validate_destroy(struct pb_validate *vl); + + +#ifdef __cplusplus +} +#endif + +#endif /*PB_VALIDATE_H_*/ diff --git a/drivers/video/Gallium/auxiliary/rtasm/rtasm_cpu.c b/drivers/video/Gallium/auxiliary/rtasm/rtasm_cpu.c new file mode 100644 index 0000000000..7afcf1452b --- /dev/null +++ b/drivers/video/Gallium/auxiliary/rtasm/rtasm_cpu.c @@ -0,0 +1,67 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_config.h" +#include "rtasm_cpu.h" + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + +#include "util/u_debug.h" +#include "util/u_cpu_detect.h" + +DEBUG_GET_ONCE_BOOL_OPTION(nosse, "GALLIUM_NOSSE", FALSE); + +static struct util_cpu_caps *get_cpu_caps(void) +{ + util_cpu_detect(); + return &util_cpu_caps; +} + +int rtasm_cpu_has_sse(void) +{ + return !debug_get_option_nosse() && get_cpu_caps()->has_sse; +} + +int rtasm_cpu_has_sse2(void) +{ + return !debug_get_option_nosse() && get_cpu_caps()->has_sse2; +} + + +#else + +int rtasm_cpu_has_sse(void) +{ + return 0; +} + +int rtasm_cpu_has_sse2(void) +{ + return 0; +} + +#endif diff --git a/drivers/video/Gallium/auxiliary/rtasm/rtasm_cpu.h b/drivers/video/Gallium/auxiliary/rtasm/rtasm_cpu.h new file mode 100644 index 0000000000..ebc71634fd --- /dev/null +++ b/drivers/video/Gallium/auxiliary/rtasm/rtasm_cpu.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Runtime detection of CPU capabilities. + */ + +#ifndef _RTASM_CPU_H_ +#define _RTASM_CPU_H_ + + +int rtasm_cpu_has_sse(void); + +int rtasm_cpu_has_sse2(void); + + +#endif /* _RTASM_CPU_H_ */ diff --git a/drivers/video/Gallium/auxiliary/rtasm/rtasm_execmem.c b/drivers/video/Gallium/auxiliary/rtasm/rtasm_execmem.c new file mode 100644 index 0000000000..50de1a992f --- /dev/null +++ b/drivers/video/Gallium/auxiliary/rtasm/rtasm_execmem.c @@ -0,0 +1,102 @@ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * \file exemem.c + * Functions for allocating executable memory. + * + * \author Keith Whitwell + */ + + +#include "pipe/p_compiler.h" +#include "util/u_debug.h" +#include "os/os_thread.h" +#include "util/u_memory.h" + +#include "rtasm_execmem.h" + +#include "util/u_mm.h" + +#define EXEC_HEAP_SIZE (4*1024*1024) + +pipe_static_mutex(exec_mutex); + +static struct mem_block *exec_heap = NULL; +static unsigned char *exec_mem = NULL; + + +static void +init_heap(void) +{ + if (!exec_heap) + exec_heap = u_mmInit( 0, EXEC_HEAP_SIZE ); + + if (!exec_mem) + exec_mem = (unsigned char *) user_alloc(EXEC_HEAP_SIZE); +} + + +void * +rtasm_exec_malloc(size_t size) +{ + struct mem_block *block = NULL; + void *addr = NULL; + + pipe_mutex_lock(exec_mutex); + + init_heap(); + + if (exec_heap) { + size = (size + 31) & ~31; /* next multiple of 32 bytes */ + block = u_mmAllocMem( exec_heap, size, 5, 0 ); /* 5 -> 32-byte alignment */ + } + + if (block) + addr = exec_mem + block->ofs; + else + debug_printf("rtasm_exec_malloc failed\n"); + + pipe_mutex_unlock(exec_mutex); + + return addr; +} + + +void +rtasm_exec_free(void *addr) +{ + pipe_mutex_lock(exec_mutex); + + if (exec_heap) { + struct mem_block *block = u_mmFindBlock(exec_heap, (unsigned char *)addr - exec_mem); + + if (block) + u_mmFreeMem(block); + } + + pipe_mutex_unlock(exec_mutex); +} + diff --git a/drivers/video/Gallium/auxiliary/rtasm/rtasm_execmem.h b/drivers/video/Gallium/auxiliary/rtasm/rtasm_execmem.h new file mode 100644 index 0000000000..5028e63cca --- /dev/null +++ b/drivers/video/Gallium/auxiliary/rtasm/rtasm_execmem.h @@ -0,0 +1,46 @@ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file exemem.c + * Functions for allocating executable memory. + * + * \author Keith Whitwell + */ + +#ifndef _RTASM_EXECMEM_H_ +#define _RTASM_EXECMEM_H_ + +#include "pipe/p_compiler.h" + + +extern void * +rtasm_exec_malloc( size_t size ); + + +extern void +rtasm_exec_free( void *addr ); + + +#endif diff --git a/drivers/video/Gallium/auxiliary/rtasm/rtasm_x86sse.c b/drivers/video/Gallium/auxiliary/rtasm/rtasm_x86sse.c new file mode 100644 index 0000000000..24ff820a4e --- /dev/null +++ b/drivers/video/Gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -0,0 +1,2232 @@ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_config.h" +#include "util/u_cpu_detect.h" + +#if defined(PIPE_ARCH_X86) || (defined(PIPE_ARCH_X86_64) && !defined(__MINGW32__)) + +#include "pipe/p_compiler.h" +#include "util/u_debug.h" +#include "util/u_pointer.h" + +#include "rtasm_execmem.h" +#include "rtasm_x86sse.h" + +#define DISASSEM 0 +#define X86_TWOB 0x0f + + +#define DUMP_SSE 0 + + +void x86_print_reg( struct x86_reg reg ) +{ + if (reg.mod != mod_REG) + debug_printf( "[" ); + + switch( reg.file ) { + case file_REG32: + switch( reg.idx ) { + case reg_AX: debug_printf( "EAX" ); break; + case reg_CX: debug_printf( "ECX" ); break; + case reg_DX: debug_printf( "EDX" ); break; + case reg_BX: debug_printf( "EBX" ); break; + case reg_SP: debug_printf( "ESP" ); break; + case reg_BP: debug_printf( "EBP" ); break; + case reg_SI: debug_printf( "ESI" ); break; + case reg_DI: debug_printf( "EDI" ); break; + } + break; + case file_MMX: + debug_printf( "MMX%u", reg.idx ); + break; + case file_XMM: + debug_printf( "XMM%u", reg.idx ); + break; + case file_x87: + debug_printf( "fp%u", reg.idx ); + break; + } + + if (reg.mod == mod_DISP8 || + reg.mod == mod_DISP32) + debug_printf("+%d", reg.disp); + + if (reg.mod != mod_REG) + debug_printf( "]" ); +} + +#if DUMP_SSE + +#define DUMP_START() debug_printf( "\n" ) +#define DUMP_END() debug_printf( "\n" ) + +#define DUMP() do { \ + const char *foo = __FUNCTION__; \ + while (*foo && *foo != '_') \ + foo++; \ + if (*foo) \ + foo++; \ + debug_printf( "\n%4x %14s ", p->csr - p->store, foo ); \ +} while (0) + +#define DUMP_I( I ) do { \ + DUMP(); \ + debug_printf( "%u", I ); \ +} while( 0 ) + +#define DUMP_R( R0 ) do { \ + DUMP(); \ + x86_print_reg( R0 ); \ +} while( 0 ) + +#define DUMP_RR( R0, R1 ) do { \ + DUMP(); \ + x86_print_reg( R0 ); \ + debug_printf( ", " ); \ + x86_print_reg( R1 ); \ +} while( 0 ) + +#define DUMP_RI( R0, I ) do { \ + DUMP(); \ + x86_print_reg( R0 ); \ + debug_printf( ", %u", I ); \ +} while( 0 ) + +#define DUMP_RRI( R0, R1, I ) do { \ + DUMP(); \ + x86_print_reg( R0 ); \ + debug_printf( ", " ); \ + x86_print_reg( R1 ); \ + debug_printf( ", %u", I ); \ +} while( 0 ) + +#else + +#define DUMP_START() +#define DUMP_END() +#define DUMP( ) +#define DUMP_I( I ) +#define DUMP_R( R0 ) +#define DUMP_RR( R0, R1 ) +#define DUMP_RI( R0, I ) +#define DUMP_RRI( R0, R1, I ) + +#endif + + +static void do_realloc( struct x86_function *p ) +{ + if (p->store == p->error_overflow) { + p->csr = p->store; + } + else if (p->size == 0) { + p->size = 1024; + p->store = rtasm_exec_malloc(p->size); + p->csr = p->store; + } + else { + uintptr_t used = pointer_to_uintptr( p->csr ) - pointer_to_uintptr( p->store ); + unsigned char *tmp = p->store; + p->size *= 2; + p->store = rtasm_exec_malloc(p->size); + + if (p->store) { + memcpy(p->store, tmp, used); + p->csr = p->store + used; + } + else { + p->csr = p->store; + } + + rtasm_exec_free(tmp); + } + + if (p->store == NULL) { + p->store = p->csr = p->error_overflow; + p->size = sizeof(p->error_overflow); + } +} + +/* Emit bytes to the instruction stream: + */ +static unsigned char *reserve( struct x86_function *p, int bytes ) +{ + if (p->csr + bytes - p->store > (int) p->size) + do_realloc(p); + + { + unsigned char *csr = p->csr; + p->csr += bytes; + return csr; + } +} + + + +static void emit_1b( struct x86_function *p, char b0 ) +{ + char *csr = (char *)reserve(p, 1); + *csr = b0; +} + +static void emit_1i( struct x86_function *p, int i0 ) +{ + int *icsr = (int *)reserve(p, sizeof(i0)); + *icsr = i0; +} + +static void emit_1ub( struct x86_function *p, unsigned char b0 ) +{ + unsigned char *csr = reserve(p, 1); + *csr++ = b0; +} + +static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 ) +{ + unsigned char *csr = reserve(p, 2); + *csr++ = b0; + *csr++ = b1; +} + +static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 ) +{ + unsigned char *csr = reserve(p, 3); + *csr++ = b0; + *csr++ = b1; + *csr++ = b2; +} + + +/* Build a modRM byte + possible displacement. No treatment of SIB + * indexing. BZZT - no way to encode an absolute address. + * + * This is the "/r" field in the x86 manuals... + */ +static void emit_modrm( struct x86_function *p, + struct x86_reg reg, + struct x86_reg regmem ) +{ + unsigned char val = 0; + + assert(reg.mod == mod_REG); + + /* TODO: support extended x86-64 registers */ + assert(reg.idx < 8); + assert(regmem.idx < 8); + + val |= regmem.mod << 6; /* mod field */ + val |= reg.idx << 3; /* reg field */ + val |= regmem.idx; /* r/m field */ + + emit_1ub(p, val); + + /* Oh-oh we've stumbled into the SIB thing. + */ + if (regmem.file == file_REG32 && + regmem.idx == reg_SP && + regmem.mod != mod_REG) { + emit_1ub(p, 0x24); /* simplistic! */ + } + + switch (regmem.mod) { + case mod_REG: + case mod_INDIRECT: + break; + case mod_DISP8: + emit_1b(p, (char) regmem.disp); + break; + case mod_DISP32: + emit_1i(p, regmem.disp); + break; + default: + assert(0); + break; + } +} + +/* Emits the "/0".."/7" specialized versions of the modrm ("/r") bytes. + */ +static void emit_modrm_noreg( struct x86_function *p, + unsigned op, + struct x86_reg regmem ) +{ + struct x86_reg dummy = x86_make_reg(file_REG32, op); + emit_modrm(p, dummy, regmem); +} + +/* Many x86 instructions have two opcodes to cope with the situations + * where the destination is a register or memory reference + * respectively. This function selects the correct opcode based on + * the arguments presented. + */ +static void emit_op_modrm( struct x86_function *p, + unsigned char op_dst_is_reg, + unsigned char op_dst_is_mem, + struct x86_reg dst, + struct x86_reg src ) +{ + switch (dst.mod) { + case mod_REG: + emit_1ub(p, op_dst_is_reg); + emit_modrm(p, dst, src); + break; + case mod_INDIRECT: + case mod_DISP32: + case mod_DISP8: + assert(src.mod == mod_REG); + emit_1ub(p, op_dst_is_mem); + emit_modrm(p, src, dst); + break; + default: + assert(0); + break; + } +} + + + + + + + +/* Create and manipulate registers and regmem values: + */ +struct x86_reg x86_make_reg( enum x86_reg_file file, + enum x86_reg_name idx ) +{ + struct x86_reg reg; + + reg.file = file; + reg.idx = idx; + reg.mod = mod_REG; + reg.disp = 0; + + return reg; +} + +struct x86_reg x86_make_disp( struct x86_reg reg, + int disp ) +{ + assert(reg.file == file_REG32); + + if (reg.mod == mod_REG) + reg.disp = disp; + else + reg.disp += disp; + + if (reg.disp == 0 && reg.idx != reg_BP) + reg.mod = mod_INDIRECT; + else if (reg.disp <= 127 && reg.disp >= -128) + reg.mod = mod_DISP8; + else + reg.mod = mod_DISP32; + + return reg; +} + +struct x86_reg x86_deref( struct x86_reg reg ) +{ + return x86_make_disp(reg, 0); +} + +struct x86_reg x86_get_base_reg( struct x86_reg reg ) +{ + return x86_make_reg( reg.file, reg.idx ); +} + +int x86_get_label( struct x86_function *p ) +{ + return p->csr - p->store; +} + + + +/*********************************************************************** + * x86 instructions + */ + + +void x64_rexw(struct x86_function *p) +{ + if(x86_target(p) != X86_32) + emit_1ub(p, 0x48); +} + +void x86_jcc( struct x86_function *p, + enum x86_cc cc, + int label ) +{ + int offset = label - (x86_get_label(p) + 2); + DUMP_I(cc); + + if (offset < 0) { + /*assert(p->csr - p->store > -offset);*/ + if (p->csr - p->store <= -offset) { + /* probably out of memory (using the error_overflow buffer) */ + return; + } + } + + if (offset <= 127 && offset >= -128) { + emit_1ub(p, 0x70 + cc); + emit_1b(p, (char) offset); + } + else { + offset = label - (x86_get_label(p) + 6); + emit_2ub(p, 0x0f, 0x80 + cc); + emit_1i(p, offset); + } +} + +/* Always use a 32bit offset for forward jumps: + */ +int x86_jcc_forward( struct x86_function *p, + enum x86_cc cc ) +{ + DUMP_I(cc); + emit_2ub(p, 0x0f, 0x80 + cc); + emit_1i(p, 0); + return x86_get_label(p); +} + +int x86_jmp_forward( struct x86_function *p) +{ + DUMP(); + emit_1ub(p, 0xe9); + emit_1i(p, 0); + return x86_get_label(p); +} + +int x86_call_forward( struct x86_function *p) +{ + DUMP(); + + emit_1ub(p, 0xe8); + emit_1i(p, 0); + return x86_get_label(p); +} + +/* Fixup offset from forward jump: + */ +void x86_fixup_fwd_jump( struct x86_function *p, + int fixup ) +{ + *(int *)(p->store + fixup - 4) = x86_get_label(p) - fixup; +} + +void x86_jmp( struct x86_function *p, int label) +{ + DUMP_I( label ); + emit_1ub(p, 0xe9); + emit_1i(p, label - x86_get_label(p) - 4); +} + +void x86_call( struct x86_function *p, struct x86_reg reg) +{ + DUMP_R( reg ); + emit_1ub(p, 0xff); + emit_modrm_noreg(p, 2, reg); +} + + +void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + assert(dst.file == file_REG32); + assert(dst.mod == mod_REG); + emit_1ub(p, 0xb8 + dst.idx); + emit_1i(p, imm); +} + +void x86_mov_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + if(dst.mod == mod_REG) + x86_mov_reg_imm(p, dst, imm); + else + { + emit_1ub(p, 0xc7); + emit_modrm_noreg(p, 0, dst); + emit_1i(p, imm); + } +} + +void x86_mov16_imm( struct x86_function *p, struct x86_reg dst, uint16_t imm ) +{ + DUMP_RI( dst, imm ); + emit_1ub(p, 0x66); + if(dst.mod == mod_REG) + { + emit_1ub(p, 0xb8 + dst.idx); + emit_2ub(p, imm & 0xff, imm >> 8); + } + else + { + emit_1ub(p, 0xc7); + emit_modrm_noreg(p, 0, dst); + emit_2ub(p, imm & 0xff, imm >> 8); + } +} + +void x86_mov8_imm( struct x86_function *p, struct x86_reg dst, uint8_t imm ) +{ + DUMP_RI( dst, imm ); + if(dst.mod == mod_REG) + { + emit_1ub(p, 0xb0 + dst.idx); + emit_1ub(p, imm); + } + else + { + emit_1ub(p, 0xc6); + emit_modrm_noreg(p, 0, dst); + emit_1ub(p, imm); + } +} + +/** + * Immediate group 1 instructions. + */ +static INLINE void +x86_group1_imm( struct x86_function *p, + unsigned op, struct x86_reg dst, int imm ) +{ + assert(dst.file == file_REG32); + assert(dst.mod == mod_REG); + if(-0x80 <= imm && imm < 0x80) { + emit_1ub(p, 0x83); + emit_modrm_noreg(p, op, dst); + emit_1b(p, (char)imm); + } + else { + emit_1ub(p, 0x81); + emit_modrm_noreg(p, op, dst); + emit_1i(p, imm); + } +} + +void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 0, dst, imm); +} + +void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 1, dst, imm); +} + +void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 4, dst, imm); +} + +void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 5, dst, imm); +} + +void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 6, dst, imm); +} + +void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 7, dst, imm); +} + + +void x86_push( struct x86_function *p, + struct x86_reg reg ) +{ + DUMP_R( reg ); + if (reg.mod == mod_REG) + emit_1ub(p, 0x50 + reg.idx); + else + { + emit_1ub(p, 0xff); + emit_modrm_noreg(p, 6, reg); + } + + + p->stack_offset += sizeof(void*); +} + +void x86_push_imm32( struct x86_function *p, + int imm32 ) +{ + DUMP_I( imm32 ); + emit_1ub(p, 0x68); + emit_1i(p, imm32); + + p->stack_offset += sizeof(void*); +} + + +void x86_pop( struct x86_function *p, + struct x86_reg reg ) +{ + DUMP_R( reg ); + assert(reg.mod == mod_REG); + emit_1ub(p, 0x58 + reg.idx); + p->stack_offset -= sizeof(void*); +} + +void x86_inc( struct x86_function *p, + struct x86_reg reg ) +{ + DUMP_R( reg ); + if(x86_target(p) == X86_32 && reg.mod == mod_REG) + { + emit_1ub(p, 0x40 + reg.idx); + return; + } + emit_1ub(p, 0xff); + emit_modrm_noreg(p, 0, reg); +} + +void x86_dec( struct x86_function *p, + struct x86_reg reg ) +{ + DUMP_R( reg ); + if(x86_target(p) == X86_32 && reg.mod == mod_REG) + { + emit_1ub(p, 0x48 + reg.idx); + return; + } + emit_1ub(p, 0xff); + emit_modrm_noreg(p, 1, reg); +} + +void x86_ret( struct x86_function *p ) +{ + DUMP(); + assert(p->stack_offset == 0); + emit_1ub(p, 0xc3); +} + +void x86_retw( struct x86_function *p, unsigned short imm ) +{ + DUMP(); + emit_3ub(p, 0xc2, imm & 0xff, (imm >> 8) & 0xff); +} + +void x86_sahf( struct x86_function *p ) +{ + DUMP(); + emit_1ub(p, 0x9e); +} + +void x86_mov( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + /* special hack for reading arguments until we support x86-64 registers everywhere */ + if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8)) + { + uint8_t rex = 0x40; + if(dst.idx >= 8) + { + rex |= 4; + dst.idx -= 8; + } + if(src.idx >= 8) + { + rex |= 1; + src.idx -= 8; + } + emit_1ub(p, rex); + } + emit_op_modrm( p, 0x8b, 0x89, dst, src ); +} + +void x86_mov16( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_1ub(p, 0x66); + emit_op_modrm( p, 0x8b, 0x89, dst, src ); +} + +void x86_mov8( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_op_modrm( p, 0x8a, 0x88, dst, src ); +} + +void x64_mov64( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + uint8_t rex = 0x48; + DUMP_RR( dst, src ); + assert(x86_target(p) != X86_32); + + /* special hack for reading arguments until we support x86-64 registers everywhere */ + if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8)) + { + if(dst.idx >= 8) + { + rex |= 4; + dst.idx -= 8; + } + if(src.idx >= 8) + { + rex |= 1; + src.idx -= 8; + } + } + emit_1ub(p, rex); + emit_op_modrm( p, 0x8b, 0x89, dst, src ); +} + +void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, 0x0f, 0xb6); + emit_modrm(p, dst, src); +} + +void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, 0x0f, 0xb7); + emit_modrm(p, dst, src); +} + +void x86_cmovcc( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src, + enum x86_cc cc) +{ + DUMP_RRI( dst, src, cc ); + emit_2ub( p, 0x0f, 0x40 + cc ); + emit_modrm( p, dst, src ); +} + +void x86_xor( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_op_modrm( p, 0x33, 0x31, dst, src ); +} + +void x86_cmp( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_op_modrm( p, 0x3b, 0x39, dst, src ); +} + +void x86_lea( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_1ub(p, 0x8d); + emit_modrm( p, dst, src ); +} + +void x86_test( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_1ub(p, 0x85); + emit_modrm( p, dst, src ); +} + +void x86_add( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_op_modrm(p, 0x03, 0x01, dst, src ); +} + +/* Calculate EAX * src, results in EDX:EAX. + */ +void x86_mul( struct x86_function *p, + struct x86_reg src ) +{ + DUMP_R( src ); + emit_1ub(p, 0xf7); + emit_modrm_noreg(p, 4, src ); +} + + +void x86_imul( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0xAF); + emit_modrm(p, dst, src); +} + + +void x86_sub( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_op_modrm(p, 0x2b, 0x29, dst, src ); +} + +void x86_or( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_op_modrm( p, 0x0b, 0x09, dst, src ); +} + +void x86_and( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_op_modrm( p, 0x23, 0x21, dst, src ); +} + +void x86_div( struct x86_function *p, + struct x86_reg src ) +{ + assert(src.file == file_REG32 && src.mod == mod_REG); + emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src); +} + +void x86_bswap( struct x86_function *p, struct x86_reg reg ) +{ + DUMP_R(reg); + assert(reg.file == file_REG32); + assert(reg.mod == mod_REG); + emit_2ub(p, 0x0f, 0xc8 + reg.idx); +} + +void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ) +{ + DUMP_RI(reg, imm); + if(imm == 1) + { + emit_1ub(p, 0xd1); + emit_modrm_noreg(p, 5, reg); + } + else + { + emit_1ub(p, 0xc1); + emit_modrm_noreg(p, 5, reg); + emit_1ub(p, imm); + } +} + +void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ) +{ + DUMP_RI(reg, imm); + if(imm == 1) + { + emit_1ub(p, 0xd1); + emit_modrm_noreg(p, 7, reg); + } + else + { + emit_1ub(p, 0xc1); + emit_modrm_noreg(p, 7, reg); + emit_1ub(p, imm); + } +} + +void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ) +{ + DUMP_RI(reg, imm); + if(imm == 1) + { + emit_1ub(p, 0xd1); + emit_modrm_noreg(p, 4, reg); + } + else + { + emit_1ub(p, 0xc1); + emit_modrm_noreg(p, 4, reg); + emit_1ub(p, imm); + } +} + + +/*********************************************************************** + * SSE instructions + */ + +void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr) +{ + DUMP_R( ptr ); + assert(ptr.mod != mod_REG); + emit_2ub(p, 0x0f, 0x18); + emit_modrm_noreg(p, 0, ptr); +} + +void sse_prefetch0( struct x86_function *p, struct x86_reg ptr) +{ + DUMP_R( ptr ); + assert(ptr.mod != mod_REG); + emit_2ub(p, 0x0f, 0x18); + emit_modrm_noreg(p, 1, ptr); +} + +void sse_prefetch1( struct x86_function *p, struct x86_reg ptr) +{ + DUMP_R( ptr ); + assert(ptr.mod != mod_REG); + emit_2ub(p, 0x0f, 0x18); + emit_modrm_noreg(p, 2, ptr); +} + +void sse_movntps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src) +{ + DUMP_RR( dst, src ); + + assert(dst.mod != mod_REG); + assert(src.mod == mod_REG); + emit_2ub(p, 0x0f, 0x2b); + emit_modrm(p, src, dst); +} + + + + +void sse_movss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, 0xF3, X86_TWOB); + emit_op_modrm( p, 0x10, 0x11, dst, src ); +} + +void sse_movaps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x28, 0x29, dst, src ); +} + +void sse_movups( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x10, 0x11, dst, src ); +} + +void sse_movhps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + assert(dst.mod != mod_REG || src.mod != mod_REG); + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ +} + +void sse_movlps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + assert(dst.mod != mod_REG || src.mod != mod_REG); + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ +} + +void sse_maxps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x5F); + emit_modrm( p, dst, src ); +} + +void sse_maxss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0xF3, X86_TWOB, 0x5F); + emit_modrm( p, dst, src ); +} + +void sse_divss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0xF3, X86_TWOB, 0x5E); + emit_modrm( p, dst, src ); +} + +void sse_minps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x5D); + emit_modrm( p, dst, src ); +} + +void sse_subps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x5C); + emit_modrm( p, dst, src ); +} + +void sse_mulps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x59); + emit_modrm( p, dst, src ); +} + +void sse_mulss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0xF3, X86_TWOB, 0x59); + emit_modrm( p, dst, src ); +} + +void sse_addps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x58); + emit_modrm( p, dst, src ); +} + +void sse_addss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0xF3, X86_TWOB, 0x58); + emit_modrm( p, dst, src ); +} + +void sse_andnps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x55); + emit_modrm( p, dst, src ); +} + +void sse_andps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x54); + emit_modrm( p, dst, src ); +} + +void sse_rsqrtps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x52); + emit_modrm( p, dst, src ); +} + +void sse_rsqrtss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0xF3, X86_TWOB, 0x52); + emit_modrm( p, dst, src ); + +} + +void sse_movhlps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + assert(dst.mod == mod_REG && src.mod == mod_REG); + emit_2ub(p, X86_TWOB, 0x12); + emit_modrm( p, dst, src ); +} + +void sse_movlhps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + assert(dst.mod == mod_REG && src.mod == mod_REG); + emit_2ub(p, X86_TWOB, 0x16); + emit_modrm( p, dst, src ); +} + +void sse_orps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x56); + emit_modrm( p, dst, src ); +} + +void sse_xorps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x57); + emit_modrm( p, dst, src ); +} + +void sse_cvtps2pi( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + assert(dst.file == file_MMX && + (src.file == file_XMM || src.mod != mod_REG)); + + p->need_emms = 1; + + emit_2ub(p, X86_TWOB, 0x2d); + emit_modrm( p, dst, src ); +} + +void sse2_cvtdq2ps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x5b); + emit_modrm( p, dst, src ); +} + + +/* Shufps can also be used to implement a reduced swizzle when dest == + * arg0. + */ +void sse_shufps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src, + unsigned char shuf) +{ + DUMP_RRI( dst, src, shuf ); + emit_2ub(p, X86_TWOB, 0xC6); + emit_modrm(p, dst, src); + emit_1ub(p, shuf); +} + +void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub( p, X86_TWOB, 0x15 ); + emit_modrm( p, dst, src ); +} + +void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub( p, X86_TWOB, 0x14 ); + emit_modrm( p, dst, src ); +} + +void sse_cmpps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src, + enum sse_cc cc) +{ + DUMP_RRI( dst, src, cc ); + emit_2ub(p, X86_TWOB, 0xC2); + emit_modrm(p, dst, src); + emit_1ub(p, cc); +} + +void sse_pmovmskb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0x66, X86_TWOB, 0xD7); + emit_modrm(p, dst, src); +} + +void sse_movmskps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x50); + emit_modrm(p, dst, src); +} + +/*********************************************************************** + * SSE2 instructions + */ + +void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR(dst, src); + emit_2ub(p, 0x66, 0x0f); + if(dst.mod == mod_REG && dst.file == file_REG32) + { + emit_1ub(p, 0x7e); + emit_modrm(p, src, dst); + } + else + { + emit_op_modrm(p, 0x6e, 0x7e, dst, src); + } +} + +void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR(dst, src); + switch (dst.mod) { + case mod_REG: + emit_3ub(p, 0xf3, 0x0f, 0x7e); + emit_modrm(p, dst, src); + break; + case mod_INDIRECT: + case mod_DISP32: + case mod_DISP8: + assert(src.mod == mod_REG); + emit_3ub(p, 0x66, 0x0f, 0xd6); + emit_modrm(p, src, dst); + break; + default: + assert(0); + break; + } +} + +void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR(dst, src); + emit_2ub(p, 0xf3, 0x0f); + emit_op_modrm(p, 0x6f, 0x7f, dst, src); +} + +void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR(dst, src); + emit_2ub(p, 0x66, 0x0f); + emit_op_modrm(p, 0x6f, 0x7f, dst, src); +} + +void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR(dst, src); + emit_2ub(p, 0xf2, 0x0f); + emit_op_modrm(p, 0x10, 0x11, dst, src); +} + +void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR(dst, src); + emit_2ub(p, 0x66, 0x0f); + emit_op_modrm(p, 0x10, 0x11, dst, src); +} + +void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR(dst, src); + emit_2ub(p, 0x66, 0x0f); + emit_op_modrm(p, 0x28, 0x29, dst, src); +} + +/** + * Perform a reduced swizzle: + */ +void sse2_pshufd( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src, + unsigned char shuf) +{ + DUMP_RRI( dst, src, shuf ); + emit_3ub(p, 0x66, X86_TWOB, 0x70); + emit_modrm(p, dst, src); + emit_1ub(p, shuf); +} + +void sse2_pshuflw( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src, + unsigned char shuf) +{ + DUMP_RRI( dst, src, shuf ); + emit_3ub(p, 0xf2, X86_TWOB, 0x70); + emit_modrm(p, dst, src); + emit_1ub(p, shuf); +} + +void sse2_pshufhw( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src, + unsigned char shuf) +{ + DUMP_RRI( dst, src, shuf ); + emit_3ub(p, 0xf3, X86_TWOB, 0x70); + emit_modrm(p, dst, src); + emit_1ub(p, shuf); +} + +void sse2_cvttps2dq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); + emit_modrm( p, dst, src ); +} + +void sse2_cvtps2dq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0x66, X86_TWOB, 0x5B); + emit_modrm( p, dst, src ); +} + +void sse2_cvtsd2ss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0xf2, 0x0f, 0x5a); + emit_modrm( p, dst, src ); +} + +void sse2_cvtpd2ps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0x66, 0x0f, 0x5a); + emit_modrm( p, dst, src ); +} + +void sse2_packssdw( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0x66, X86_TWOB, 0x6B); + emit_modrm( p, dst, src ); +} + +void sse2_packsswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0x66, X86_TWOB, 0x63); + emit_modrm( p, dst, src ); +} + +void sse2_packuswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0x66, X86_TWOB, 0x67); + emit_modrm( p, dst, src ); +} + +void sse2_punpcklbw( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0x66, X86_TWOB, 0x60); + emit_modrm( p, dst, src ); +} + +void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0x66, 0x0f, 0x61); + emit_modrm( p, dst, src ); +} + +void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0x66, 0x0f, 0x62); + emit_modrm( p, dst, src ); +} + +void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0x66, 0x0f, 0x6c); + emit_modrm( p, dst, src ); +} + +void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) +{ + DUMP_RI(dst, imm); + emit_3ub(p, 0x66, 0x0f, 0x71); + emit_modrm_noreg(p, 6, dst); + emit_1ub(p, imm); +} + +void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) +{ + DUMP_RI(dst, imm); + emit_3ub(p, 0x66, 0x0f, 0x72); + emit_modrm_noreg(p, 6, dst); + emit_1ub(p, imm); +} + +void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) +{ + DUMP_RI(dst, imm); + emit_3ub(p, 0x66, 0x0f, 0x73); + emit_modrm_noreg(p, 6, dst); + emit_1ub(p, imm); +} + +void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) +{ + DUMP_RI(dst, imm); + emit_3ub(p, 0x66, 0x0f, 0x71); + emit_modrm_noreg(p, 2, dst); + emit_1ub(p, imm); +} + +void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) +{ + DUMP_RI(dst, imm); + emit_3ub(p, 0x66, 0x0f, 0x72); + emit_modrm_noreg(p, 2, dst); + emit_1ub(p, imm); +} + +void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) +{ + DUMP_RI(dst, imm); + emit_3ub(p, 0x66, 0x0f, 0x73); + emit_modrm_noreg(p, 2, dst); + emit_1ub(p, imm); +} + +void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) +{ + DUMP_RI(dst, imm); + emit_3ub(p, 0x66, 0x0f, 0x71); + emit_modrm_noreg(p, 4, dst); + emit_1ub(p, imm); +} + +void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) +{ + DUMP_RI(dst, imm); + emit_3ub(p, 0x66, 0x0f, 0x72); + emit_modrm_noreg(p, 4, dst); + emit_1ub(p, imm); +} + +void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR(dst, src); + emit_3ub(p, 0x66, 0x0f, 0xeb); + emit_modrm(p, dst, src); +} + +void sse2_rcpps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x53); + emit_modrm( p, dst, src ); +} + +void sse2_rcpss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0xF3, X86_TWOB, 0x53); + emit_modrm( p, dst, src ); +} + +/*********************************************************************** + * x87 instructions + */ +static void note_x87_pop( struct x86_function *p ) +{ + p->x87_stack--; + assert(p->x87_stack >= 0); +} + +static void note_x87_push( struct x86_function *p ) +{ + p->x87_stack++; + assert(p->x87_stack <= 7); +} + +void x87_assert_stack_empty( struct x86_function *p ) +{ + assert (p->x87_stack == 0); +} + + +void x87_fist( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + emit_1ub(p, 0xdb); + emit_modrm_noreg(p, 2, dst); +} + +void x87_fistp( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + emit_1ub(p, 0xdb); + emit_modrm_noreg(p, 3, dst); + note_x87_pop(p); +} + +void x87_fild( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + emit_1ub(p, 0xdf); + emit_modrm_noreg(p, 0, arg); + note_x87_push(p); +} + +void x87_fldz( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xee); + note_x87_push(p); +} + + +void x87_fldcw( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_REG32); + assert(arg.mod != mod_REG); + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 5, arg); +} + +void x87_fld1( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xe8); + note_x87_push(p); +} + +void x87_fldl2e( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xea); + note_x87_push(p); +} + +void x87_fldln2( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xed); + note_x87_push(p); +} + +void x87_fwait( struct x86_function *p ) +{ + DUMP(); + emit_1ub(p, 0x9b); +} + +void x87_fnclex( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xdb, 0xe2); +} + +void x87_fclex( struct x86_function *p ) +{ + x87_fwait(p); + x87_fnclex(p); +} + +void x87_fcmovb( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xda, 0xc0+arg.idx); +} + +void x87_fcmove( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xda, 0xc8+arg.idx); +} + +void x87_fcmovbe( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xda, 0xd0+arg.idx); +} + +void x87_fcmovnb( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xdb, 0xc0+arg.idx); +} + +void x87_fcmovne( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xdb, 0xc8+arg.idx); +} + +void x87_fcmovnbe( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xdb, 0xd0+arg.idx); +} + + + +static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, + unsigned char dst0ub0, + unsigned char dst0ub1, + unsigned char arg0ub0, + unsigned char arg0ub1, + unsigned char argmem_noreg) +{ + assert(dst.file == file_x87); + + if (arg.file == file_x87) { + if (dst.idx == 0) + emit_2ub(p, dst0ub0, dst0ub1+arg.idx); + else if (arg.idx == 0) + emit_2ub(p, arg0ub0, arg0ub1+arg.idx); + else + assert(0); + } + else if (dst.idx == 0) { + assert(arg.file == file_REG32); + emit_1ub(p, 0xd8); + emit_modrm_noreg(p, argmem_noreg, arg); + } + else + assert(0); +} + +void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + x87_arith_op(p, dst, src, + 0xd8, 0xc8, + 0xdc, 0xc8, + 4); +} + +void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + x87_arith_op(p, dst, src, + 0xd8, 0xe0, + 0xdc, 0xe8, + 4); +} + +void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + x87_arith_op(p, dst, src, + 0xd8, 0xe8, + 0xdc, 0xe0, + 5); +} + +void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + x87_arith_op(p, dst, src, + 0xd8, 0xc0, + 0xdc, 0xc0, + 0); +} + +void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + x87_arith_op(p, dst, src, + 0xd8, 0xf0, + 0xdc, 0xf8, + 6); +} + +void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + x87_arith_op(p, dst, src, + 0xd8, 0xf8, + 0xdc, 0xf0, + 7); +} + +void x87_fmulp( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xc8+dst.idx); + note_x87_pop(p); +} + +void x87_fsubp( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xe8+dst.idx); + note_x87_pop(p); +} + +void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xe0+dst.idx); + note_x87_pop(p); +} + +void x87_faddp( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xc0+dst.idx); + note_x87_pop(p); +} + +void x87_fdivp( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xf8+dst.idx); + note_x87_pop(p); +} + +void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xf0+dst.idx); + note_x87_pop(p); +} + +void x87_ftst( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xe4); +} + +void x87_fucom( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xdd, 0xe0+arg.idx); +} + +void x87_fucomp( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xdd, 0xe8+arg.idx); + note_x87_pop(p); +} + +void x87_fucompp( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xda, 0xe9); + note_x87_pop(p); /* pop twice */ + note_x87_pop(p); /* pop twice */ +} + +void x87_fxch( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xd9, 0xc8+arg.idx); +} + +void x87_fabs( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xe1); +} + +void x87_fchs( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xe0); +} + +void x87_fcos( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xff); +} + + +void x87_fprndint( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xfc); +} + +void x87_fscale( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xfd); +} + +void x87_fsin( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xfe); +} + +void x87_fsincos( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xfb); +} + +void x87_fsqrt( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xfa); +} + +void x87_fxtract( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xf4); +} + +/* st0 = (2^st0)-1 + * + * Restrictions: -1.0 <= st0 <= 1.0 + */ +void x87_f2xm1( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xf0); +} + +/* st1 = st1 * log2(st0); + * pop_stack; + */ +void x87_fyl2x( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xf1); + note_x87_pop(p); +} + +/* st1 = st1 * log2(st0 + 1.0); + * pop_stack; + * + * A fast operation, with restrictions: -.29 < st0 < .29 + */ +void x87_fyl2xp1( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xf9); + note_x87_pop(p); +} + + +void x87_fld( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + if (arg.file == file_x87) + emit_2ub(p, 0xd9, 0xc0 + arg.idx); + else { + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 0, arg); + } + note_x87_push(p); +} + +void x87_fst( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + if (dst.file == file_x87) + emit_2ub(p, 0xdd, 0xd0 + dst.idx); + else { + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 2, dst); + } +} + +void x87_fstp( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + if (dst.file == file_x87) + emit_2ub(p, 0xdd, 0xd8 + dst.idx); + else { + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 3, dst); + } + note_x87_pop(p); +} + +void x87_fpop( struct x86_function *p ) +{ + x87_fstp( p, x86_make_reg( file_x87, 0 )); +} + + +void x87_fcom( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + if (dst.file == file_x87) + emit_2ub(p, 0xd8, 0xd0 + dst.idx); + else { + emit_1ub(p, 0xd8); + emit_modrm_noreg(p, 2, dst); + } +} + + +void x87_fcomp( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + if (dst.file == file_x87) + emit_2ub(p, 0xd8, 0xd8 + dst.idx); + else { + emit_1ub(p, 0xd8); + emit_modrm_noreg(p, 3, dst); + } + note_x87_pop(p); +} + +void x87_fcomi( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + emit_2ub(p, 0xdb, 0xf0+arg.idx); +} + +void x87_fcomip( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + emit_2ub(p, 0xdb, 0xf0+arg.idx); + note_x87_pop(p); +} + + +void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + assert(dst.file == file_REG32); + + if (dst.idx == reg_AX && + dst.mod == mod_REG) + emit_2ub(p, 0xdf, 0xe0); + else { + emit_1ub(p, 0xdd); + emit_modrm_noreg(p, 7, dst); + } +} + + +void x87_fnstcw( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + assert(dst.file == file_REG32); + + emit_1ub(p, 0x9b); /* WAIT -- needed? */ + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 7, dst); +} + + + + +/*********************************************************************** + * MMX instructions + */ + +void mmx_emms( struct x86_function *p ) +{ + DUMP(); + assert(p->need_emms); + emit_2ub(p, 0x0f, 0x77); + p->need_emms = 0; +} + +void mmx_packssdw( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + assert(dst.file == file_MMX && + (src.file == file_MMX || src.mod != mod_REG)); + + p->need_emms = 1; + + emit_2ub(p, X86_TWOB, 0x6b); + emit_modrm( p, dst, src ); +} + +void mmx_packuswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + assert(dst.file == file_MMX && + (src.file == file_MMX || src.mod != mod_REG)); + + p->need_emms = 1; + + emit_2ub(p, X86_TWOB, 0x67); + emit_modrm( p, dst, src ); +} + +void mmx_movd( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + p->need_emms = 1; + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x6e, 0x7e, dst, src ); +} + +void mmx_movq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + p->need_emms = 1; + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x6f, 0x7f, dst, src ); +} + + +/*********************************************************************** + * Helper functions + */ + + +void x86_cdecl_caller_push_regs( struct x86_function *p ) +{ + x86_push(p, x86_make_reg(file_REG32, reg_AX)); + x86_push(p, x86_make_reg(file_REG32, reg_CX)); + x86_push(p, x86_make_reg(file_REG32, reg_DX)); +} + +void x86_cdecl_caller_pop_regs( struct x86_function *p ) +{ + x86_pop(p, x86_make_reg(file_REG32, reg_DX)); + x86_pop(p, x86_make_reg(file_REG32, reg_CX)); + x86_pop(p, x86_make_reg(file_REG32, reg_AX)); +} + + +struct x86_reg x86_fn_arg( struct x86_function *p, + unsigned arg ) +{ + switch(x86_target(p)) + { + case X86_64_WIN64_ABI: + /* Microsoft uses a different calling convention than the rest of the world */ + switch(arg) + { + case 1: + return x86_make_reg(file_REG32, reg_CX); + case 2: + return x86_make_reg(file_REG32, reg_DX); + case 3: + return x86_make_reg(file_REG32, reg_R8); + case 4: + return x86_make_reg(file_REG32, reg_R9); + default: + /* Win64 allocates stack slots as if it pushed the first 4 arguments too */ + return x86_make_disp(x86_make_reg(file_REG32, reg_SP), + p->stack_offset + arg * 8); + } + case X86_64_STD_ABI: + switch(arg) + { + case 1: + return x86_make_reg(file_REG32, reg_DI); + case 2: + return x86_make_reg(file_REG32, reg_SI); + case 3: + return x86_make_reg(file_REG32, reg_DX); + case 4: + return x86_make_reg(file_REG32, reg_CX); + case 5: + return x86_make_reg(file_REG32, reg_R8); + case 6: + return x86_make_reg(file_REG32, reg_R9); + default: + return x86_make_disp(x86_make_reg(file_REG32, reg_SP), + p->stack_offset + (arg - 6) * 8); /* ??? */ + } + case X86_32: + return x86_make_disp(x86_make_reg(file_REG32, reg_SP), + p->stack_offset + arg * 4); /* ??? */ + default: + assert(0 && "Unexpected x86 target ABI in x86_fn_arg"); + return x86_make_reg(file_REG32, reg_CX); /* not used / silence warning */ + } +} + +static void x86_init_func_common( struct x86_function *p ) +{ + util_cpu_detect(); + p->caps = 0; + if(util_cpu_caps.has_mmx) + p->caps |= X86_MMX; + if(util_cpu_caps.has_mmx2) + p->caps |= X86_MMX2; + if(util_cpu_caps.has_sse) + p->caps |= X86_SSE; + if(util_cpu_caps.has_sse2) + p->caps |= X86_SSE2; + if(util_cpu_caps.has_sse3) + p->caps |= X86_SSE3; + if(util_cpu_caps.has_sse4_1) + p->caps |= X86_SSE4_1; + p->csr = p->store; + DUMP_START(); +} + +void x86_init_func( struct x86_function *p ) +{ + p->size = 0; + p->store = NULL; + x86_init_func_common(p); +} + +void x86_init_func_size( struct x86_function *p, unsigned code_size ) +{ + p->size = code_size; + p->store = rtasm_exec_malloc(code_size); + if (p->store == NULL) { + p->store = p->error_overflow; + } + x86_init_func_common(p); +} + +void x86_release_func( struct x86_function *p ) +{ + if (p->store && p->store != p->error_overflow) + rtasm_exec_free(p->store); + + p->store = NULL; + p->csr = NULL; + p->size = 0; +} + + +static INLINE x86_func +voidptr_to_x86_func(void *v) +{ + union { + void *v; + x86_func f; + } u; + assert(sizeof(u.v) == sizeof(u.f)); + u.v = v; + return u.f; +} + + +x86_func x86_get_func( struct x86_function *p ) +{ + DUMP_END(); + if (DISASSEM && p->store) + debug_printf("disassemble %p %p\n", p->store, p->csr); + + if (p->store == p->error_overflow) + return voidptr_to_x86_func(NULL); + else + return voidptr_to_x86_func(p->store); +} + +#else + +void x86sse_dummy( void ); + +void x86sse_dummy( void ) +{ +} + +#endif diff --git a/drivers/video/Gallium/auxiliary/rtasm/rtasm_x86sse.h b/drivers/video/Gallium/auxiliary/rtasm/rtasm_x86sse.h new file mode 100644 index 0000000000..67c9bdd993 --- /dev/null +++ b/drivers/video/Gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -0,0 +1,416 @@ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef _RTASM_X86SSE_H_ +#define _RTASM_X86SSE_H_ + +#include "pipe/p_compiler.h" +#include "pipe/p_config.h" + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + +/* It is up to the caller to ensure that instructions issued are + * suitable for the host cpu. There are no checks made in this module + * for mmx/sse/sse2 support on the cpu. + */ +struct x86_reg { + unsigned file:2; + unsigned idx:4; + unsigned mod:2; /* mod_REG if this is just a register */ + int disp:24; /* only +/- 23bits of offset - should be enough... */ +}; + +#define X86_MMX 1 +#define X86_MMX2 2 +#define X86_SSE 4 +#define X86_SSE2 8 +#define X86_SSE3 0x10 +#define X86_SSE4_1 0x20 + +struct x86_function { + unsigned caps; + unsigned size; + unsigned char *store; + unsigned char *csr; + + unsigned stack_offset:16; + unsigned need_emms:8; + int x87_stack:8; + + unsigned char error_overflow[4]; +}; + +enum x86_reg_file { + file_REG32, + file_MMX, + file_XMM, + file_x87 +}; + +/* Values for mod field of modr/m byte + */ +enum x86_reg_mod { + mod_INDIRECT, + mod_DISP8, + mod_DISP32, + mod_REG +}; + +enum x86_reg_name { + reg_AX, + reg_CX, + reg_DX, + reg_BX, + reg_SP, + reg_BP, + reg_SI, + reg_DI, + reg_R8, + reg_R9, + reg_R10, + reg_R11, + reg_R12, + reg_R13, + reg_R14, + reg_R15 +}; + + +enum x86_cc { + cc_O, /* overflow */ + cc_NO, /* not overflow */ + cc_NAE, /* not above or equal / carry */ + cc_AE, /* above or equal / not carry */ + cc_E, /* equal / zero */ + cc_NE /* not equal / not zero */ +}; + +enum sse_cc { + cc_Equal, + cc_LessThan, + cc_LessThanEqual, + cc_Unordered, + cc_NotEqual, + cc_NotLessThan, + cc_NotLessThanEqual, + cc_Ordered +}; + +#define cc_Z cc_E +#define cc_NZ cc_NE + + +/** generic pointer to function */ +typedef void (*x86_func)(void); + + +/* Begin/end/retrieve function creation: + */ + +enum x86_target +{ + X86_32, + X86_64_STD_ABI, + X86_64_WIN64_ABI +}; + +/* make this read a member of x86_function if target != host is desired */ +static INLINE enum x86_target x86_target( struct x86_function* p ) +{ +#ifdef PIPE_ARCH_X86 + return X86_32; +#elif defined(_WIN64) + return X86_64_WIN64_ABI; +#elif defined(PIPE_ARCH_X86_64) + return X86_64_STD_ABI; +#endif +} + +static INLINE unsigned x86_target_caps( struct x86_function* p ) +{ + return p->caps; +} + +void x86_init_func( struct x86_function *p ); +void x86_init_func_size( struct x86_function *p, unsigned code_size ); +void x86_release_func( struct x86_function *p ); +x86_func x86_get_func( struct x86_function *p ); + +/* Debugging: + */ +void x86_print_reg( struct x86_reg reg ); + + +/* Create and manipulate registers and regmem values: + */ +struct x86_reg x86_make_reg( enum x86_reg_file file, + enum x86_reg_name idx ); + +struct x86_reg x86_make_disp( struct x86_reg reg, + int disp ); + +struct x86_reg x86_deref( struct x86_reg reg ); + +struct x86_reg x86_get_base_reg( struct x86_reg reg ); + + +/* Labels, jumps and fixup: + */ +int x86_get_label( struct x86_function *p ); + +void x64_rexw(struct x86_function *p); + +void x86_jcc( struct x86_function *p, + enum x86_cc cc, + int label ); + +int x86_jcc_forward( struct x86_function *p, + enum x86_cc cc ); + +int x86_jmp_forward( struct x86_function *p); + +int x86_call_forward( struct x86_function *p); + +void x86_fixup_fwd_jump( struct x86_function *p, + int fixup ); + +void x86_jmp( struct x86_function *p, int label ); + +/* void x86_call( struct x86_function *p, void (*label)() ); */ +void x86_call( struct x86_function *p, struct x86_reg reg); + +void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ); + + +/* Macro for sse_shufps() and sse2_pshufd(): + */ +#define SHUF(_x,_y,_z,_w) (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6)) +#define SHUF_NOOP RSW(0,1,2,3) +#define GET_SHUF(swz, idx) (((swz) >> ((idx)*2)) & 0x3) + +void mmx_emms( struct x86_function *p ); +void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); + +void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); + +void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_cvtsd2ss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_cvtpd2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); + +void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, + unsigned char shuf ); +void sse2_pshuflw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, + unsigned char shuf ); +void sse2_pshufhw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, + unsigned char shuf ); +void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); + +void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); + +void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); +void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); +void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); + +void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); +void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); +void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); + +void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); +void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); + +void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); + +void sse2_pshuflw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm ); +void sse2_pshufhw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm ); +void sse2_pshufd( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm ); + +void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr); +void sse_prefetch0( struct x86_function *p, struct x86_reg ptr); +void sse_prefetch1( struct x86_function *p, struct x86_reg ptr); + +void sse_movntps( struct x86_function *p, struct x86_reg dst, struct x86_reg src); + +void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, + enum sse_cc cc ); +void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, + unsigned char shuf ); +void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src ); +void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct x86_reg src); + +void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_cmovcc( struct x86_function *p, struct x86_reg dst, struct x86_reg src, enum x86_cc cc ); +void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_dec( struct x86_function *p, struct x86_reg reg ); +void x86_inc( struct x86_function *p, struct x86_reg reg ); +void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x64_mov64( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_mov8( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_mov16( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_mov_imm(struct x86_function *p, struct x86_reg dst, int imm ); +void x86_mov8_imm(struct x86_function *p, struct x86_reg dst, uint8_t imm ); +void x86_mov16_imm(struct x86_function *p, struct x86_reg dst, uint16_t imm ); +void x86_mul( struct x86_function *p, struct x86_reg src ); +void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_pop( struct x86_function *p, struct x86_reg reg ); +void x86_push( struct x86_function *p, struct x86_reg reg ); +void x86_push_imm32( struct x86_function *p, int imm ); +void x86_ret( struct x86_function *p ); +void x86_retw( struct x86_function *p, unsigned short imm ); +void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_sahf( struct x86_function *p ); +void x86_div( struct x86_function *p, struct x86_reg src ); +void x86_bswap( struct x86_function *p, struct x86_reg src ); +void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ); +void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ); +void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ); + +void x86_cdecl_caller_push_regs( struct x86_function *p ); +void x86_cdecl_caller_pop_regs( struct x86_function *p ); + +void x87_assert_stack_empty( struct x86_function *p ); + +void x87_f2xm1( struct x86_function *p ); +void x87_fabs( struct x86_function *p ); +void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_faddp( struct x86_function *p, struct x86_reg dst ); +void x87_fchs( struct x86_function *p ); +void x87_fclex( struct x86_function *p ); +void x87_fcmovb( struct x86_function *p, struct x86_reg src ); +void x87_fcmovbe( struct x86_function *p, struct x86_reg src ); +void x87_fcmove( struct x86_function *p, struct x86_reg src ); +void x87_fcmovnb( struct x86_function *p, struct x86_reg src ); +void x87_fcmovnbe( struct x86_function *p, struct x86_reg src ); +void x87_fcmovne( struct x86_function *p, struct x86_reg src ); +void x87_fcom( struct x86_function *p, struct x86_reg dst ); +void x87_fcomi( struct x86_function *p, struct x86_reg dst ); +void x87_fcomip( struct x86_function *p, struct x86_reg dst ); +void x87_fcomp( struct x86_function *p, struct x86_reg dst ); +void x87_fcos( struct x86_function *p ); +void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fdivp( struct x86_function *p, struct x86_reg dst ); +void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fdivrp( struct x86_function *p, struct x86_reg dst ); +void x87_fild( struct x86_function *p, struct x86_reg arg ); +void x87_fist( struct x86_function *p, struct x86_reg dst ); +void x87_fistp( struct x86_function *p, struct x86_reg dst ); +void x87_fld( struct x86_function *p, struct x86_reg arg ); +void x87_fld1( struct x86_function *p ); +void x87_fldcw( struct x86_function *p, struct x86_reg arg ); +void x87_fldl2e( struct x86_function *p ); +void x87_fldln2( struct x86_function *p ); +void x87_fldz( struct x86_function *p ); +void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fmulp( struct x86_function *p, struct x86_reg dst ); +void x87_fnclex( struct x86_function *p ); +void x87_fprndint( struct x86_function *p ); +void x87_fpop( struct x86_function *p ); +void x87_fscale( struct x86_function *p ); +void x87_fsin( struct x86_function *p ); +void x87_fsincos( struct x86_function *p ); +void x87_fsqrt( struct x86_function *p ); +void x87_fst( struct x86_function *p, struct x86_reg dst ); +void x87_fstp( struct x86_function *p, struct x86_reg dst ); +void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fsubp( struct x86_function *p, struct x86_reg dst ); +void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fsubrp( struct x86_function *p, struct x86_reg dst ); +void x87_ftst( struct x86_function *p ); +void x87_fxch( struct x86_function *p, struct x86_reg dst ); +void x87_fxtract( struct x86_function *p ); +void x87_fyl2x( struct x86_function *p ); +void x87_fyl2xp1( struct x86_function *p ); +void x87_fwait( struct x86_function *p ); +void x87_fnstcw( struct x86_function *p, struct x86_reg dst ); +void x87_fnstsw( struct x86_function *p, struct x86_reg dst ); +void x87_fucompp( struct x86_function *p ); +void x87_fucomp( struct x86_function *p, struct x86_reg arg ); +void x87_fucom( struct x86_function *p, struct x86_reg arg ); + + + +/* Retrieve a reference to one of the function arguments, taking into + * account any push/pop activity. Note - doesn't track explicit + * manipulation of ESP by other instructions. + */ +struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg ); + +#endif +#endif diff --git a/drivers/video/Gallium/auxiliary/translate/translate.c b/drivers/video/Gallium/auxiliary/translate/translate.c new file mode 100644 index 0000000000..73287b667d --- /dev/null +++ b/drivers/video/Gallium/auxiliary/translate/translate.c @@ -0,0 +1,55 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include "pipe/p_config.h" +#include "pipe/p_state.h" +#include "translate.h" + +struct translate *translate_create( const struct translate_key *key ) +{ + struct translate *translate = NULL; + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + translate = translate_sse2_create( key ); + if (translate) + return translate; +#else + (void)translate; +#endif + + return translate_generic_create( key ); +} + +boolean translate_is_output_format_supported(enum pipe_format format) +{ + return translate_generic_is_output_format_supported(format); +} diff --git a/drivers/video/Gallium/auxiliary/translate/translate.h b/drivers/video/Gallium/auxiliary/translate/translate.h new file mode 100644 index 0000000000..1132114de9 --- /dev/null +++ b/drivers/video/Gallium/auxiliary/translate/translate.h @@ -0,0 +1,160 @@ +/* + * Copyright 2008 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +/** + * Vertex fetch/store/convert code. This functionality is used in two places: + * 1. Vertex fetch/convert - to grab vertex data from incoming vertex + * arrays and convert to format needed by vertex shaders. + * 2. Vertex store/emit - to convert simple float[][4] vertex attributes + * (which is the organization used throughout the draw/prim pipeline) to + * hardware-specific formats and emit into hardware vertex buffers. + * + * + * Authors: + * Keith Whitwell + */ + +#ifndef _TRANSLATE_H +#define _TRANSLATE_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" +#include "pipe/p_state.h" + +enum translate_element_type { + TRANSLATE_ELEMENT_NORMAL, + TRANSLATE_ELEMENT_INSTANCE_ID +}; + +struct translate_element +{ + enum translate_element_type type; + enum pipe_format input_format; + enum pipe_format output_format; + unsigned input_buffer:8; + unsigned input_offset:24; + unsigned instance_divisor; + unsigned output_offset; +}; + + +struct translate_key { + unsigned output_stride; + unsigned nr_elements; + struct translate_element element[PIPE_MAX_ATTRIBS + 1]; +}; + + +struct translate; + + +typedef void (PIPE_CDECL *run_elts_func)(struct translate *, + const unsigned *elts, + unsigned count, + unsigned start_instance, + unsigned instance_id, + void *output_buffer); + +typedef void (PIPE_CDECL *run_elts16_func)(struct translate *, + const uint16_t *elts, + unsigned count, + unsigned start_instance, + unsigned instance_id, + void *output_buffer); + +typedef void (PIPE_CDECL *run_elts8_func)(struct translate *, + const uint8_t *elts, + unsigned count, + unsigned start_instance, + unsigned instance_id, + void *output_buffer); + +typedef void (PIPE_CDECL *run_func)(struct translate *, + unsigned start, + unsigned count, + unsigned start_instance, + unsigned instance_id, + void *output_buffer); + +struct translate { + struct translate_key key; + + void (*release)( struct translate * ); + + void (*set_buffer)( struct translate *, + unsigned i, + const void *ptr, + unsigned stride, + unsigned max_index ); + + run_elts_func run_elts; + run_elts16_func run_elts16; + run_elts8_func run_elts8; + run_func run; +}; + + + +struct translate *translate_create( const struct translate_key *key ); + +boolean translate_is_output_format_supported(enum pipe_format format); + +static INLINE int translate_keysize( const struct translate_key *key ) +{ + return 2 * sizeof(int) + key->nr_elements * sizeof(struct translate_element); +} + +static INLINE int translate_key_compare( const struct translate_key *a, + const struct translate_key *b ) +{ + int keysize_a = translate_keysize(a); + int keysize_b = translate_keysize(b); + + if (keysize_a != keysize_b) { + return keysize_a - keysize_b; + } + return memcmp(a, b, keysize_a); +} + + +static INLINE void translate_key_sanitize( struct translate_key *a ) +{ + int keysize = translate_keysize(a); + char *ptr = (char *)a; + memset(ptr + keysize, 0, sizeof(*a) - keysize); +} + + +/******************************************************************************* + * Private: + */ +struct translate *translate_sse2_create( const struct translate_key *key ); + +struct translate *translate_generic_create( const struct translate_key *key ); + +boolean translate_generic_is_output_format_supported(enum pipe_format format); + +#endif diff --git a/drivers/video/Gallium/auxiliary/translate/translate_cache.c b/drivers/video/Gallium/auxiliary/translate/translate_cache.c new file mode 100644 index 0000000000..3f1ecb630f --- /dev/null +++ b/drivers/video/Gallium/auxiliary/translate/translate_cache.c @@ -0,0 +1,106 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" +#include "pipe/p_state.h" +#include "translate.h" +#include "translate_cache.h" + +#include "cso_cache/cso_cache.h" +#include "cso_cache/cso_hash.h" + +struct translate_cache { + struct cso_hash *hash; +}; + +struct translate_cache * translate_cache_create( void ) +{ + struct translate_cache *cache = MALLOC_STRUCT(translate_cache); + if (cache == NULL) { + return NULL; + } + + cache->hash = cso_hash_create(); + return cache; +} + + +static INLINE void delete_translates(struct translate_cache *cache) +{ + struct cso_hash *hash = cache->hash; + struct cso_hash_iter iter = cso_hash_first_node(hash); + while (!cso_hash_iter_is_null(iter)) { + struct translate *state = (struct translate*)cso_hash_iter_data(iter); + iter = cso_hash_iter_next(iter); + if (state) { + state->release(state); + } + } +} + +void translate_cache_destroy(struct translate_cache *cache) +{ + delete_translates(cache); + cso_hash_delete(cache->hash); + FREE(cache); +} + + +static INLINE unsigned translate_hash_key_size(struct translate_key *key) +{ + unsigned size = sizeof(struct translate_key) - + sizeof(struct translate_element) * (PIPE_MAX_ATTRIBS - key->nr_elements); + return size; +} + +static INLINE unsigned create_key(struct translate_key *key) +{ + unsigned hash_key; + unsigned size = translate_hash_key_size(key); + /*debug_printf("key size = %d, (els = %d)\n", + size, key->nr_elements);*/ + hash_key = cso_construct_key(key, size); + return hash_key; +} + +struct translate * translate_cache_find(struct translate_cache *cache, + struct translate_key *key) +{ + unsigned hash_key = create_key(key); + struct translate *translate = (struct translate*) + cso_hash_find_data_from_template(cache->hash, + hash_key, + key, sizeof(*key)); + + if (!translate) { + /* create/insert */ + translate = translate_create(key); + cso_hash_insert(cache->hash, hash_key, translate); + } + + return translate; +} diff --git a/drivers/video/Gallium/auxiliary/translate/translate_cache.h b/drivers/video/Gallium/auxiliary/translate/translate_cache.h new file mode 100644 index 0000000000..7dba871e57 --- /dev/null +++ b/drivers/video/Gallium/auxiliary/translate/translate_cache.h @@ -0,0 +1,54 @@ +/* + * Copyright 2008 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _TRANSLATE_CACHE_H +#define _TRANSLATE_CACHE_H + + +/******************************************************************************* + * Translate cache. + * Simply used to cache created translates. Avoids unecessary creation of + * translate's if one suitable for a given translate_key has already been + * created. + * + * Note: this functionality depends and requires the CSO module. + */ +struct translate_cache; + +struct translate_key; +struct translate; + +struct translate_cache *translate_cache_create( void ); +void translate_cache_destroy(struct translate_cache *cache); + +/** + * Will try to find a translate structure matched by the given key. + * If such a structure doesn't exist in the cache the function + * will automatically create it, insert it in the cache and + * return the created version. + * + */ +struct translate *translate_cache_find(struct translate_cache *cache, + struct translate_key *key); + +#endif diff --git a/drivers/video/Gallium/auxiliary/translate/translate_generic.c b/drivers/video/Gallium/auxiliary/translate/translate_generic.c new file mode 100644 index 0000000000..96e35b0eb4 --- /dev/null +++ b/drivers/video/Gallium/auxiliary/translate/translate_generic.c @@ -0,0 +1,998 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include "util/u_memory.h" +#include "util/u_format.h" +#include "util/u_half.h" +#include "util/u_math.h" +#include "pipe/p_state.h" +#include "translate.h" + + +#define DRAW_DBG 0 + +typedef void (*fetch_func)(void *dst, + const uint8_t *src, + unsigned i, unsigned j); +typedef void (*emit_func)(const void *attrib, void *ptr); + + + +struct translate_generic { + struct translate translate; + + struct { + enum translate_element_type type; + + fetch_func fetch; + unsigned buffer; + unsigned input_offset; + unsigned instance_divisor; + + emit_func emit; + unsigned output_offset; + + const uint8_t *input_ptr; + unsigned input_stride; + unsigned max_index; + + /* this value is set to -1 if this is a normal element with output_format != input_format: + * in this case, u_format is used to do a full conversion + * + * this value is set to the format size in bytes if output_format == input_format or for 32-bit instance ids: + * in this case, memcpy is used to copy this amount of bytes + */ + int copy_size; + + } attrib[PIPE_MAX_ATTRIBS]; + + unsigned nr_attrib; +}; + + +static struct translate_generic *translate_generic( struct translate *translate ) +{ + return (struct translate_generic *)translate; +} + +/** + * Fetch a dword[4] vertex attribute from memory, doing format/type + * conversion as needed. + * + * This is probably needed/dupliocated elsewhere, eg format + * conversion, texture sampling etc. + */ +#define ATTRIB( NAME, SZ, SRCTYPE, DSTTYPE, TO ) \ +static void \ +emit_##NAME(const void *attrib, void *ptr) \ +{ \ + unsigned i; \ + SRCTYPE *in = (SRCTYPE *)attrib; \ + DSTTYPE *out = (DSTTYPE *)ptr; \ + \ + for (i = 0; i < SZ; i++) { \ + out[i] = TO(in[i]); \ + } \ +} + + +#define TO_64_FLOAT(x) ((double) x) +#define TO_32_FLOAT(x) (x) +#define TO_16_FLOAT(x) util_float_to_half(x) + +#define TO_8_USCALED(x) ((unsigned char) x) +#define TO_16_USCALED(x) ((unsigned short) x) +#define TO_32_USCALED(x) ((unsigned int) x) + +#define TO_8_SSCALED(x) ((char) x) +#define TO_16_SSCALED(x) ((short) x) +#define TO_32_SSCALED(x) ((int) x) + +#define TO_8_UNORM(x) ((unsigned char) (x * 255.0f)) +#define TO_16_UNORM(x) ((unsigned short) (x * 65535.0f)) +#define TO_32_UNORM(x) ((unsigned int) (x * 4294967295.0f)) + +#define TO_8_SNORM(x) ((char) (x * 127.0f)) +#define TO_16_SNORM(x) ((short) (x * 32767.0f)) +#define TO_32_SNORM(x) ((int) (x * 2147483647.0f)) + +#define TO_32_FIXED(x) ((int) (x * 65536.0f)) + +#define TO_INT(x) (x) + + +ATTRIB( R64G64B64A64_FLOAT, 4, float, double, TO_64_FLOAT ) +ATTRIB( R64G64B64_FLOAT, 3, float, double, TO_64_FLOAT ) +ATTRIB( R64G64_FLOAT, 2, float, double, TO_64_FLOAT ) +ATTRIB( R64_FLOAT, 1, float, double, TO_64_FLOAT ) + +ATTRIB( R32G32B32A32_FLOAT, 4, float, float, TO_32_FLOAT ) +ATTRIB( R32G32B32_FLOAT, 3, float, float, TO_32_FLOAT ) +ATTRIB( R32G32_FLOAT, 2, float, float, TO_32_FLOAT ) +ATTRIB( R32_FLOAT, 1, float, float, TO_32_FLOAT ) + +ATTRIB( R16G16B16A16_FLOAT, 4, float, ushort, TO_16_FLOAT ) +ATTRIB( R16G16B16_FLOAT, 3, float, ushort, TO_16_FLOAT ) +ATTRIB( R16G16_FLOAT, 2, float, ushort, TO_16_FLOAT ) +ATTRIB( R16_FLOAT, 1, float, ushort, TO_16_FLOAT ) + +ATTRIB( R32G32B32A32_USCALED, 4, float, unsigned, TO_32_USCALED ) +ATTRIB( R32G32B32_USCALED, 3, float, unsigned, TO_32_USCALED ) +ATTRIB( R32G32_USCALED, 2, float, unsigned, TO_32_USCALED ) +ATTRIB( R32_USCALED, 1, float, unsigned, TO_32_USCALED ) + +ATTRIB( R32G32B32A32_SSCALED, 4, float, int, TO_32_SSCALED ) +ATTRIB( R32G32B32_SSCALED, 3, float, int, TO_32_SSCALED ) +ATTRIB( R32G32_SSCALED, 2, float, int, TO_32_SSCALED ) +ATTRIB( R32_SSCALED, 1, float, int, TO_32_SSCALED ) + +ATTRIB( R32G32B32A32_UNORM, 4, float, unsigned, TO_32_UNORM ) +ATTRIB( R32G32B32_UNORM, 3, float, unsigned, TO_32_UNORM ) +ATTRIB( R32G32_UNORM, 2, float, unsigned, TO_32_UNORM ) +ATTRIB( R32_UNORM, 1, float, unsigned, TO_32_UNORM ) + +ATTRIB( R32G32B32A32_SNORM, 4, float, int, TO_32_SNORM ) +ATTRIB( R32G32B32_SNORM, 3, float, int, TO_32_SNORM ) +ATTRIB( R32G32_SNORM, 2, float, int, TO_32_SNORM ) +ATTRIB( R32_SNORM, 1, float, int, TO_32_SNORM ) + +ATTRIB( R16G16B16A16_USCALED, 4, float, ushort, TO_16_USCALED ) +ATTRIB( R16G16B16_USCALED, 3, float, ushort, TO_16_USCALED ) +ATTRIB( R16G16_USCALED, 2, float, ushort, TO_16_USCALED ) +ATTRIB( R16_USCALED, 1, float, ushort, TO_16_USCALED ) + +ATTRIB( R16G16B16A16_SSCALED, 4, float, short, TO_16_SSCALED ) +ATTRIB( R16G16B16_SSCALED, 3, float, short, TO_16_SSCALED ) +ATTRIB( R16G16_SSCALED, 2, float, short, TO_16_SSCALED ) +ATTRIB( R16_SSCALED, 1, float, short, TO_16_SSCALED ) + +ATTRIB( R16G16B16A16_UNORM, 4, float, ushort, TO_16_UNORM ) +ATTRIB( R16G16B16_UNORM, 3, float, ushort, TO_16_UNORM ) +ATTRIB( R16G16_UNORM, 2, float, ushort, TO_16_UNORM ) +ATTRIB( R16_UNORM, 1, float, ushort, TO_16_UNORM ) + +ATTRIB( R16G16B16A16_SNORM, 4, float, short, TO_16_SNORM ) +ATTRIB( R16G16B16_SNORM, 3, float, short, TO_16_SNORM ) +ATTRIB( R16G16_SNORM, 2, float, short, TO_16_SNORM ) +ATTRIB( R16_SNORM, 1, float, short, TO_16_SNORM ) + +ATTRIB( R8G8B8A8_USCALED, 4, float, ubyte, TO_8_USCALED ) +ATTRIB( R8G8B8_USCALED, 3, float, ubyte, TO_8_USCALED ) +ATTRIB( R8G8_USCALED, 2, float, ubyte, TO_8_USCALED ) +ATTRIB( R8_USCALED, 1, float, ubyte, TO_8_USCALED ) + +ATTRIB( R8G8B8A8_SSCALED, 4, float, char, TO_8_SSCALED ) +ATTRIB( R8G8B8_SSCALED, 3, float, char, TO_8_SSCALED ) +ATTRIB( R8G8_SSCALED, 2, float, char, TO_8_SSCALED ) +ATTRIB( R8_SSCALED, 1, float, char, TO_8_SSCALED ) + +ATTRIB( R8G8B8A8_UNORM, 4, float, ubyte, TO_8_UNORM ) +ATTRIB( R8G8B8_UNORM, 3, float, ubyte, TO_8_UNORM ) +ATTRIB( R8G8_UNORM, 2, float, ubyte, TO_8_UNORM ) +ATTRIB( R8_UNORM, 1, float, ubyte, TO_8_UNORM ) + +ATTRIB( R8G8B8A8_SNORM, 4, float, char, TO_8_SNORM ) +ATTRIB( R8G8B8_SNORM, 3, float, char, TO_8_SNORM ) +ATTRIB( R8G8_SNORM, 2, float, char, TO_8_SNORM ) +ATTRIB( R8_SNORM, 1, float, char, TO_8_SNORM ) + +ATTRIB( R32G32B32A32_UINT, 4, uint32_t, unsigned, TO_INT ) +ATTRIB( R32G32B32_UINT, 3, uint32_t, unsigned, TO_INT ) +ATTRIB( R32G32_UINT, 2, uint32_t, unsigned, TO_INT ) +ATTRIB( R32_UINT, 1, uint32_t, unsigned, TO_INT ) + +ATTRIB( R16G16B16A16_UINT, 4, uint32_t, ushort, TO_INT ) +ATTRIB( R16G16B16_UINT, 3, uint32_t, ushort, TO_INT ) +ATTRIB( R16G16_UINT, 2, uint32_t, ushort, TO_INT ) +ATTRIB( R16_UINT, 1, uint32_t, ushort, TO_INT ) + +ATTRIB( R8G8B8A8_UINT, 4, uint32_t, ubyte, TO_INT ) +ATTRIB( R8G8B8_UINT, 3, uint32_t, ubyte, TO_INT ) +ATTRIB( R8G8_UINT, 2, uint32_t, ubyte, TO_INT ) +ATTRIB( R8_UINT, 1, uint32_t, ubyte, TO_INT ) + +ATTRIB( R32G32B32A32_SINT, 4, int32_t, int, TO_INT ) +ATTRIB( R32G32B32_SINT, 3, int32_t, int, TO_INT ) +ATTRIB( R32G32_SINT, 2, int32_t, int, TO_INT ) +ATTRIB( R32_SINT, 1, int32_t, int, TO_INT ) + +ATTRIB( R16G16B16A16_SINT, 4, int32_t, short, TO_INT ) +ATTRIB( R16G16B16_SINT, 3, int32_t, short, TO_INT ) +ATTRIB( R16G16_SINT, 2, int32_t, short, TO_INT ) +ATTRIB( R16_SINT, 1, int32_t, short, TO_INT ) + +ATTRIB( R8G8B8A8_SINT, 4, int32_t, char, TO_INT ) +ATTRIB( R8G8B8_SINT, 3, int32_t, char, TO_INT ) +ATTRIB( R8G8_SINT, 2, int32_t, char, TO_INT ) +ATTRIB( R8_SINT, 1, int32_t, char, TO_INT ) + +static void +emit_A8R8G8B8_UNORM( const void *attrib, void *ptr) +{ + float *in = (float *)attrib; + ubyte *out = (ubyte *)ptr; + out[0] = TO_8_UNORM(in[3]); + out[1] = TO_8_UNORM(in[0]); + out[2] = TO_8_UNORM(in[1]); + out[3] = TO_8_UNORM(in[2]); +} + +static void +emit_B8G8R8A8_UNORM( const void *attrib, void *ptr) +{ + float *in = (float *)attrib; + ubyte *out = (ubyte *)ptr; + out[2] = TO_8_UNORM(in[0]); + out[1] = TO_8_UNORM(in[1]); + out[0] = TO_8_UNORM(in[2]); + out[3] = TO_8_UNORM(in[3]); +} + +static void +emit_B10G10R10A2_UNORM( const void *attrib, void *ptr ) +{ + float *src = (float *)ptr; + uint32_t value = 0; + value |= ((uint32_t)(CLAMP(src[2], 0, 1) * 0x3ff)) & 0x3ff; + value |= (((uint32_t)(CLAMP(src[1], 0, 1) * 0x3ff)) & 0x3ff) << 10; + value |= (((uint32_t)(CLAMP(src[0], 0, 1) * 0x3ff)) & 0x3ff) << 20; + value |= ((uint32_t)(CLAMP(src[3], 0, 1) * 0x3)) << 30; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *(uint32_t *)attrib = value; +} + +static void +emit_B10G10R10A2_USCALED( const void *attrib, void *ptr ) +{ + float *src = (float *)ptr; + uint32_t value = 0; + value |= ((uint32_t)CLAMP(src[2], 0, 1023)) & 0x3ff; + value |= (((uint32_t)CLAMP(src[1], 0, 1023)) & 0x3ff) << 10; + value |= (((uint32_t)CLAMP(src[0], 0, 1023)) & 0x3ff) << 20; + value |= ((uint32_t)CLAMP(src[3], 0, 3)) << 30; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *(uint32_t *)attrib = value; +} + +static void +emit_B10G10R10A2_SNORM( const void *attrib, void *ptr ) +{ + float *src = (float *)ptr; + uint32_t value = 0; + value |= (uint32_t)(((uint32_t)(CLAMP(src[2], -1, 1) * 0x1ff)) & 0x3ff) ; + value |= (uint32_t)((((uint32_t)(CLAMP(src[1], -1, 1) * 0x1ff)) & 0x3ff) << 10) ; + value |= (uint32_t)((((uint32_t)(CLAMP(src[0], -1, 1) * 0x1ff)) & 0x3ff) << 20) ; + value |= (uint32_t)(((uint32_t)(CLAMP(src[3], -1, 1) * 0x1)) << 30) ; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *(uint32_t *)attrib = value; +} + +static void +emit_B10G10R10A2_SSCALED( const void *attrib, void *ptr ) +{ + float *src = (float *)ptr; + uint32_t value = 0; + value |= (uint32_t)(((uint32_t)CLAMP(src[2], -512, 511)) & 0x3ff) ; + value |= (uint32_t)((((uint32_t)CLAMP(src[1], -512, 511)) & 0x3ff) << 10) ; + value |= (uint32_t)((((uint32_t)CLAMP(src[0], -512, 511)) & 0x3ff) << 20) ; + value |= (uint32_t)(((uint32_t)CLAMP(src[3], -2, 1)) << 30) ; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *(uint32_t *)attrib = value; +} + +static void +emit_R10G10B10A2_UNORM( const void *attrib, void *ptr ) +{ + float *src = (float *)ptr; + uint32_t value = 0; + value |= ((uint32_t)(CLAMP(src[0], 0, 1) * 0x3ff)) & 0x3ff; + value |= (((uint32_t)(CLAMP(src[1], 0, 1) * 0x3ff)) & 0x3ff) << 10; + value |= (((uint32_t)(CLAMP(src[2], 0, 1) * 0x3ff)) & 0x3ff) << 20; + value |= ((uint32_t)(CLAMP(src[3], 0, 1) * 0x3)) << 30; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *(uint32_t *)attrib = value; +} + +static void +emit_R10G10B10A2_USCALED( const void *attrib, void *ptr ) +{ + float *src = (float *)ptr; + uint32_t value = 0; + value |= ((uint32_t)CLAMP(src[0], 0, 1023)) & 0x3ff; + value |= (((uint32_t)CLAMP(src[1], 0, 1023)) & 0x3ff) << 10; + value |= (((uint32_t)CLAMP(src[2], 0, 1023)) & 0x3ff) << 20; + value |= ((uint32_t)CLAMP(src[3], 0, 3)) << 30; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *(uint32_t *)attrib = value; +} + +static void +emit_R10G10B10A2_SNORM( const void *attrib, void *ptr ) +{ + float *src = (float *)ptr; + uint32_t value = 0; + value |= (uint32_t)(((uint32_t)(CLAMP(src[0], -1, 1) * 0x1ff)) & 0x3ff) ; + value |= (uint32_t)((((uint32_t)(CLAMP(src[1], -1, 1) * 0x1ff)) & 0x3ff) << 10) ; + value |= (uint32_t)((((uint32_t)(CLAMP(src[2], -1, 1) * 0x1ff)) & 0x3ff) << 20) ; + value |= (uint32_t)(((uint32_t)(CLAMP(src[3], -1, 1) * 0x1)) << 30) ; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *(uint32_t *)attrib = value; +} + +static void +emit_R10G10B10A2_SSCALED( const void *attrib, void *ptr) +{ + float *src = (float *)ptr; + uint32_t value = 0; + value |= (uint32_t)(((uint32_t)CLAMP(src[0], -512, 511)) & 0x3ff) ; + value |= (uint32_t)((((uint32_t)CLAMP(src[1], -512, 511)) & 0x3ff) << 10) ; + value |= (uint32_t)((((uint32_t)CLAMP(src[2], -512, 511)) & 0x3ff) << 20) ; + value |= (uint32_t)(((uint32_t)CLAMP(src[3], -2, 1)) << 30) ; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *(uint32_t *)attrib = value; +} + +static void +emit_NULL( const void *attrib, void *ptr ) +{ + /* do nothing is the only sensible option */ +} + +static emit_func get_emit_func( enum pipe_format format ) +{ + switch (format) { + case PIPE_FORMAT_R64_FLOAT: + return &emit_R64_FLOAT; + case PIPE_FORMAT_R64G64_FLOAT: + return &emit_R64G64_FLOAT; + case PIPE_FORMAT_R64G64B64_FLOAT: + return &emit_R64G64B64_FLOAT; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return &emit_R64G64B64A64_FLOAT; + + case PIPE_FORMAT_R32_FLOAT: + return &emit_R32_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return &emit_R32G32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return &emit_R32G32B32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return &emit_R32G32B32A32_FLOAT; + + case PIPE_FORMAT_R16_FLOAT: + return &emit_R16_FLOAT; + case PIPE_FORMAT_R16G16_FLOAT: + return &emit_R16G16_FLOAT; + case PIPE_FORMAT_R16G16B16_FLOAT: + return &emit_R16G16B16_FLOAT; + case PIPE_FORMAT_R16G16B16A16_FLOAT: + return &emit_R16G16B16A16_FLOAT; + + case PIPE_FORMAT_R32_UNORM: + return &emit_R32_UNORM; + case PIPE_FORMAT_R32G32_UNORM: + return &emit_R32G32_UNORM; + case PIPE_FORMAT_R32G32B32_UNORM: + return &emit_R32G32B32_UNORM; + case PIPE_FORMAT_R32G32B32A32_UNORM: + return &emit_R32G32B32A32_UNORM; + + case PIPE_FORMAT_R32_USCALED: + return &emit_R32_USCALED; + case PIPE_FORMAT_R32G32_USCALED: + return &emit_R32G32_USCALED; + case PIPE_FORMAT_R32G32B32_USCALED: + return &emit_R32G32B32_USCALED; + case PIPE_FORMAT_R32G32B32A32_USCALED: + return &emit_R32G32B32A32_USCALED; + + case PIPE_FORMAT_R32_SNORM: + return &emit_R32_SNORM; + case PIPE_FORMAT_R32G32_SNORM: + return &emit_R32G32_SNORM; + case PIPE_FORMAT_R32G32B32_SNORM: + return &emit_R32G32B32_SNORM; + case PIPE_FORMAT_R32G32B32A32_SNORM: + return &emit_R32G32B32A32_SNORM; + + case PIPE_FORMAT_R32_SSCALED: + return &emit_R32_SSCALED; + case PIPE_FORMAT_R32G32_SSCALED: + return &emit_R32G32_SSCALED; + case PIPE_FORMAT_R32G32B32_SSCALED: + return &emit_R32G32B32_SSCALED; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + return &emit_R32G32B32A32_SSCALED; + + case PIPE_FORMAT_R16_UNORM: + return &emit_R16_UNORM; + case PIPE_FORMAT_R16G16_UNORM: + return &emit_R16G16_UNORM; + case PIPE_FORMAT_R16G16B16_UNORM: + return &emit_R16G16B16_UNORM; + case PIPE_FORMAT_R16G16B16A16_UNORM: + return &emit_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R16_USCALED: + return &emit_R16_USCALED; + case PIPE_FORMAT_R16G16_USCALED: + return &emit_R16G16_USCALED; + case PIPE_FORMAT_R16G16B16_USCALED: + return &emit_R16G16B16_USCALED; + case PIPE_FORMAT_R16G16B16A16_USCALED: + return &emit_R16G16B16A16_USCALED; + + case PIPE_FORMAT_R16_SNORM: + return &emit_R16_SNORM; + case PIPE_FORMAT_R16G16_SNORM: + return &emit_R16G16_SNORM; + case PIPE_FORMAT_R16G16B16_SNORM: + return &emit_R16G16B16_SNORM; + case PIPE_FORMAT_R16G16B16A16_SNORM: + return &emit_R16G16B16A16_SNORM; + + case PIPE_FORMAT_R16_SSCALED: + return &emit_R16_SSCALED; + case PIPE_FORMAT_R16G16_SSCALED: + return &emit_R16G16_SSCALED; + case PIPE_FORMAT_R16G16B16_SSCALED: + return &emit_R16G16B16_SSCALED; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return &emit_R16G16B16A16_SSCALED; + + case PIPE_FORMAT_R8_UNORM: + return &emit_R8_UNORM; + case PIPE_FORMAT_R8G8_UNORM: + return &emit_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_UNORM: + return &emit_R8G8B8_UNORM; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return &emit_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_USCALED: + return &emit_R8_USCALED; + case PIPE_FORMAT_R8G8_USCALED: + return &emit_R8G8_USCALED; + case PIPE_FORMAT_R8G8B8_USCALED: + return &emit_R8G8B8_USCALED; + case PIPE_FORMAT_R8G8B8A8_USCALED: + return &emit_R8G8B8A8_USCALED; + + case PIPE_FORMAT_R8_SNORM: + return &emit_R8_SNORM; + case PIPE_FORMAT_R8G8_SNORM: + return &emit_R8G8_SNORM; + case PIPE_FORMAT_R8G8B8_SNORM: + return &emit_R8G8B8_SNORM; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return &emit_R8G8B8A8_SNORM; + + case PIPE_FORMAT_R8_SSCALED: + return &emit_R8_SSCALED; + case PIPE_FORMAT_R8G8_SSCALED: + return &emit_R8G8_SSCALED; + case PIPE_FORMAT_R8G8B8_SSCALED: + return &emit_R8G8B8_SSCALED; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return &emit_R8G8B8A8_SSCALED; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + return &emit_B8G8R8A8_UNORM; + + case PIPE_FORMAT_A8R8G8B8_UNORM: + return &emit_A8R8G8B8_UNORM; + + case PIPE_FORMAT_R32_UINT: + return &emit_R32_UINT; + case PIPE_FORMAT_R32G32_UINT: + return &emit_R32G32_UINT; + case PIPE_FORMAT_R32G32B32_UINT: + return &emit_R32G32B32_UINT; + case PIPE_FORMAT_R32G32B32A32_UINT: + return &emit_R32G32B32A32_UINT; + + case PIPE_FORMAT_R16_UINT: + return &emit_R16_UINT; + case PIPE_FORMAT_R16G16_UINT: + return &emit_R16G16_UINT; + case PIPE_FORMAT_R16G16B16_UINT: + return &emit_R16G16B16_UINT; + case PIPE_FORMAT_R16G16B16A16_UINT: + return &emit_R16G16B16A16_UINT; + + case PIPE_FORMAT_R8_UINT: + return &emit_R8_UINT; + case PIPE_FORMAT_R8G8_UINT: + return &emit_R8G8_UINT; + case PIPE_FORMAT_R8G8B8_UINT: + return &emit_R8G8B8_UINT; + case PIPE_FORMAT_R8G8B8A8_UINT: + return &emit_R8G8B8A8_UINT; + + case PIPE_FORMAT_R32_SINT: + return &emit_R32_SINT; + case PIPE_FORMAT_R32G32_SINT: + return &emit_R32G32_SINT; + case PIPE_FORMAT_R32G32B32_SINT: + return &emit_R32G32B32_SINT; + case PIPE_FORMAT_R32G32B32A32_SINT: + return &emit_R32G32B32A32_SINT; + + case PIPE_FORMAT_R16_SINT: + return &emit_R16_SINT; + case PIPE_FORMAT_R16G16_SINT: + return &emit_R16G16_SINT; + case PIPE_FORMAT_R16G16B16_SINT: + return &emit_R16G16B16_SINT; + case PIPE_FORMAT_R16G16B16A16_SINT: + return &emit_R16G16B16A16_SINT; + + case PIPE_FORMAT_R8_SINT: + return &emit_R8_SINT; + case PIPE_FORMAT_R8G8_SINT: + return &emit_R8G8_SINT; + case PIPE_FORMAT_R8G8B8_SINT: + return &emit_R8G8B8_SINT; + case PIPE_FORMAT_R8G8B8A8_SINT: + return &emit_R8G8B8A8_SINT; + + case PIPE_FORMAT_B10G10R10A2_UNORM: + return &emit_B10G10R10A2_UNORM; + case PIPE_FORMAT_B10G10R10A2_USCALED: + return &emit_B10G10R10A2_USCALED; + case PIPE_FORMAT_B10G10R10A2_SNORM: + return &emit_B10G10R10A2_SNORM; + case PIPE_FORMAT_B10G10R10A2_SSCALED: + return &emit_B10G10R10A2_SSCALED; + + case PIPE_FORMAT_R10G10B10A2_UNORM: + return &emit_R10G10B10A2_UNORM; + case PIPE_FORMAT_R10G10B10A2_USCALED: + return &emit_R10G10B10A2_USCALED; + case PIPE_FORMAT_R10G10B10A2_SNORM: + return &emit_R10G10B10A2_SNORM; + case PIPE_FORMAT_R10G10B10A2_SSCALED: + return &emit_R10G10B10A2_SSCALED; + + default: + assert(0); + return &emit_NULL; + } +} + +static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic *tg, + unsigned elt, + unsigned start_instance, + unsigned instance_id, + void *vert ) +{ + unsigned nr_attrs = tg->nr_attrib; + unsigned attr; + + for (attr = 0; attr < nr_attrs; attr++) { + float data[4]; + uint8_t *dst = (uint8_t *)vert + tg->attrib[attr].output_offset; + + if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { + const uint8_t *src; + unsigned index; + int copy_size; + + if (tg->attrib[attr].instance_divisor) { + index = start_instance; + index += (instance_id - start_instance) / + tg->attrib[attr].instance_divisor; + /* XXX we need to clamp the index here too, but to a + * per-array max value, not the draw->pt.max_index value + * that's being given to us via translate->set_buffer(). + */ + } + else { + index = elt; + /* clamp to avoid going out of bounds */ + index = MIN2(index, tg->attrib[attr].max_index); + } + + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * index; + + copy_size = tg->attrib[attr].copy_size; + if(likely(copy_size >= 0)) + memcpy(dst, src, copy_size); + else + { + tg->attrib[attr].fetch( data, src, 0, 0 ); + + if (0) + debug_printf("Fetch linear attr %d from %p stride %d index %d: " + " %f, %f, %f, %f \n", + attr, + tg->attrib[attr].input_ptr, + tg->attrib[attr].input_stride, + index, + data[0], data[1],data[2], data[3]); + + tg->attrib[attr].emit( data, dst ); + } + } else { + if(likely(tg->attrib[attr].copy_size >= 0)) + memcpy(data, &instance_id, 4); + else + { + data[0] = (float)instance_id; + tg->attrib[attr].emit( data, dst ); + } + } + } +} + +/** + * Fetch vertex attributes for 'count' vertices. + */ +static void PIPE_CDECL generic_run_elts( struct translate *translate, + const unsigned *elts, + unsigned count, + unsigned start_instance, + unsigned instance_id, + void *output_buffer ) +{ + struct translate_generic *tg = translate_generic(translate); + char *vert = output_buffer; + unsigned i; + + for (i = 0; i < count; i++) { + generic_run_one(tg, *elts++, start_instance, instance_id, vert); + vert += tg->translate.key.output_stride; + } +} + +static void PIPE_CDECL generic_run_elts16( struct translate *translate, + const uint16_t *elts, + unsigned count, + unsigned start_instance, + unsigned instance_id, + void *output_buffer ) +{ + struct translate_generic *tg = translate_generic(translate); + char *vert = output_buffer; + unsigned i; + + for (i = 0; i < count; i++) { + generic_run_one(tg, *elts++, start_instance, instance_id, vert); + vert += tg->translate.key.output_stride; + } +} + +static void PIPE_CDECL generic_run_elts8( struct translate *translate, + const uint8_t *elts, + unsigned count, + unsigned start_instance, + unsigned instance_id, + void *output_buffer ) +{ + struct translate_generic *tg = translate_generic(translate); + char *vert = output_buffer; + unsigned i; + + for (i = 0; i < count; i++) { + generic_run_one(tg, *elts++, start_instance, instance_id, vert); + vert += tg->translate.key.output_stride; + } +} + +static void PIPE_CDECL generic_run( struct translate *translate, + unsigned start, + unsigned count, + unsigned start_instance, + unsigned instance_id, + void *output_buffer ) +{ + struct translate_generic *tg = translate_generic(translate); + char *vert = output_buffer; + unsigned i; + + for (i = 0; i < count; i++) { + generic_run_one(tg, start + i, start_instance, instance_id, vert); + vert += tg->translate.key.output_stride; + } +} + + + +static void generic_set_buffer( struct translate *translate, + unsigned buf, + const void *ptr, + unsigned stride, + unsigned max_index ) +{ + struct translate_generic *tg = translate_generic(translate); + unsigned i; + + for (i = 0; i < tg->nr_attrib; i++) { + if (tg->attrib[i].buffer == buf) { + tg->attrib[i].input_ptr = ((const uint8_t *)ptr + + tg->attrib[i].input_offset); + tg->attrib[i].input_stride = stride; + tg->attrib[i].max_index = max_index; + } + } +} + + +static void generic_release( struct translate *translate ) +{ + /* Refcount? + */ + FREE(translate); +} + +static boolean +is_legal_int_format_combo( const struct util_format_description *src, + const struct util_format_description *dst ) +{ + unsigned i; + unsigned nr = MIN2(src->nr_channels, dst->nr_channels); + + for (i = 0; i < nr; i++) { + /* The signs must match. */ + if (src->channel[i].type != dst->channel[i].type) { + return FALSE; + } + + /* Integers must not lose precision at any point in the pipeline. */ + if (src->channel[i].size > dst->channel[i].size) { + return FALSE; + } + } + return TRUE; +} + +struct translate *translate_generic_create( const struct translate_key *key ) +{ + struct translate_generic *tg = CALLOC_STRUCT(translate_generic); + unsigned i; + + if (tg == NULL) + return NULL; + + tg->translate.key = *key; + tg->translate.release = generic_release; + tg->translate.set_buffer = generic_set_buffer; + tg->translate.run_elts = generic_run_elts; + tg->translate.run_elts16 = generic_run_elts16; + tg->translate.run_elts8 = generic_run_elts8; + tg->translate.run = generic_run; + + for (i = 0; i < key->nr_elements; i++) { + const struct util_format_description *format_desc = + util_format_description(key->element[i].input_format); + + assert(format_desc); + + tg->attrib[i].type = key->element[i].type; + + if (format_desc->channel[0].pure_integer) { + const struct util_format_description *out_format_desc = + util_format_description(key->element[i].output_format); + + if (!is_legal_int_format_combo(format_desc, out_format_desc)) { + FREE(tg); + return NULL; + } + + if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { + assert(format_desc->fetch_rgba_sint); + tg->attrib[i].fetch = (fetch_func)format_desc->fetch_rgba_sint; + } else { + assert(format_desc->fetch_rgba_uint); + tg->attrib[i].fetch = (fetch_func)format_desc->fetch_rgba_uint; + } + } else { + assert(format_desc->fetch_rgba_float); + tg->attrib[i].fetch = (fetch_func)format_desc->fetch_rgba_float; + } + + tg->attrib[i].buffer = key->element[i].input_buffer; + tg->attrib[i].input_offset = key->element[i].input_offset; + tg->attrib[i].instance_divisor = key->element[i].instance_divisor; + + tg->attrib[i].output_offset = key->element[i].output_offset; + + tg->attrib[i].copy_size = -1; + if (tg->attrib[i].type == TRANSLATE_ELEMENT_INSTANCE_ID) + { + if(key->element[i].output_format == PIPE_FORMAT_R32_USCALED + || key->element[i].output_format == PIPE_FORMAT_R32_SSCALED) + tg->attrib[i].copy_size = 4; + } + else + { + if(key->element[i].input_format == key->element[i].output_format + && format_desc->block.width == 1 + && format_desc->block.height == 1 + && !(format_desc->block.bits & 7)) + tg->attrib[i].copy_size = format_desc->block.bits >> 3; + } + + if(tg->attrib[i].copy_size < 0) + tg->attrib[i].emit = get_emit_func(key->element[i].output_format); + else + tg->attrib[i].emit = NULL; + } + + tg->nr_attrib = key->nr_elements; + + + return &tg->translate; +} + +boolean translate_generic_is_output_format_supported(enum pipe_format format) +{ + switch(format) + { + case PIPE_FORMAT_R64G64B64A64_FLOAT: return TRUE; + case PIPE_FORMAT_R64G64B64_FLOAT: return TRUE; + case PIPE_FORMAT_R64G64_FLOAT: return TRUE; + case PIPE_FORMAT_R64_FLOAT: return TRUE; + + case PIPE_FORMAT_R32G32B32A32_FLOAT: return TRUE; + case PIPE_FORMAT_R32G32B32_FLOAT: return TRUE; + case PIPE_FORMAT_R32G32_FLOAT: return TRUE; + case PIPE_FORMAT_R32_FLOAT: return TRUE; + + case PIPE_FORMAT_R16G16B16A16_FLOAT: return TRUE; + case PIPE_FORMAT_R16G16B16_FLOAT: return TRUE; + case PIPE_FORMAT_R16G16_FLOAT: return TRUE; + case PIPE_FORMAT_R16_FLOAT: return TRUE; + + case PIPE_FORMAT_R32G32B32A32_USCALED: return TRUE; + case PIPE_FORMAT_R32G32B32_USCALED: return TRUE; + case PIPE_FORMAT_R32G32_USCALED: return TRUE; + case PIPE_FORMAT_R32_USCALED: return TRUE; + + case PIPE_FORMAT_R32G32B32A32_SSCALED: return TRUE; + case PIPE_FORMAT_R32G32B32_SSCALED: return TRUE; + case PIPE_FORMAT_R32G32_SSCALED: return TRUE; + case PIPE_FORMAT_R32_SSCALED: return TRUE; + + case PIPE_FORMAT_R32G32B32A32_UNORM: return TRUE; + case PIPE_FORMAT_R32G32B32_UNORM: return TRUE; + case PIPE_FORMAT_R32G32_UNORM: return TRUE; + case PIPE_FORMAT_R32_UNORM: return TRUE; + + case PIPE_FORMAT_R32G32B32A32_SNORM: return TRUE; + case PIPE_FORMAT_R32G32B32_SNORM: return TRUE; + case PIPE_FORMAT_R32G32_SNORM: return TRUE; + case PIPE_FORMAT_R32_SNORM: return TRUE; + + case PIPE_FORMAT_R16G16B16A16_USCALED: return TRUE; + case PIPE_FORMAT_R16G16B16_USCALED: return TRUE; + case PIPE_FORMAT_R16G16_USCALED: return TRUE; + case PIPE_FORMAT_R16_USCALED: return TRUE; + + case PIPE_FORMAT_R16G16B16A16_SSCALED: return TRUE; + case PIPE_FORMAT_R16G16B16_SSCALED: return TRUE; + case PIPE_FORMAT_R16G16_SSCALED: return TRUE; + case PIPE_FORMAT_R16_SSCALED: return TRUE; + + case PIPE_FORMAT_R16G16B16A16_UNORM: return TRUE; + case PIPE_FORMAT_R16G16B16_UNORM: return TRUE; + case PIPE_FORMAT_R16G16_UNORM: return TRUE; + case PIPE_FORMAT_R16_UNORM: return TRUE; + + case PIPE_FORMAT_R16G16B16A16_SNORM: return TRUE; + case PIPE_FORMAT_R16G16B16_SNORM: return TRUE; + case PIPE_FORMAT_R16G16_SNORM: return TRUE; + case PIPE_FORMAT_R16_SNORM: return TRUE; + + case PIPE_FORMAT_R8G8B8A8_USCALED: return TRUE; + case PIPE_FORMAT_R8G8B8_USCALED: return TRUE; + case PIPE_FORMAT_R8G8_USCALED: return TRUE; + case PIPE_FORMAT_R8_USCALED: return TRUE; + + case PIPE_FORMAT_R8G8B8A8_SSCALED: return TRUE; + case PIPE_FORMAT_R8G8B8_SSCALED: return TRUE; + case PIPE_FORMAT_R8G8_SSCALED: return TRUE; + case PIPE_FORMAT_R8_SSCALED: return TRUE; + + case PIPE_FORMAT_R8G8B8A8_UNORM: return TRUE; + case PIPE_FORMAT_R8G8B8_UNORM: return TRUE; + case PIPE_FORMAT_R8G8_UNORM: return TRUE; + case PIPE_FORMAT_R8_UNORM: return TRUE; + + case PIPE_FORMAT_R8G8B8A8_SNORM: return TRUE; + case PIPE_FORMAT_R8G8B8_SNORM: return TRUE; + case PIPE_FORMAT_R8G8_SNORM: return TRUE; + case PIPE_FORMAT_R8_SNORM: return TRUE; + + case PIPE_FORMAT_A8R8G8B8_UNORM: return TRUE; + case PIPE_FORMAT_B8G8R8A8_UNORM: return TRUE; + + case PIPE_FORMAT_R32G32B32A32_UINT: return TRUE; + case PIPE_FORMAT_R32G32B32_UINT: return TRUE; + case PIPE_FORMAT_R32G32_UINT: return TRUE; + case PIPE_FORMAT_R32_UINT: return TRUE; + + case PIPE_FORMAT_R16G16B16A16_UINT: return TRUE; + case PIPE_FORMAT_R16G16B16_UINT: return TRUE; + case PIPE_FORMAT_R16G16_UINT: return TRUE; + case PIPE_FORMAT_R16_UINT: return TRUE; + + case PIPE_FORMAT_R8G8B8A8_UINT: return TRUE; + case PIPE_FORMAT_R8G8B8_UINT: return TRUE; + case PIPE_FORMAT_R8G8_UINT: return TRUE; + case PIPE_FORMAT_R8_UINT: return TRUE; + + case PIPE_FORMAT_R32G32B32A32_SINT: return TRUE; + case PIPE_FORMAT_R32G32B32_SINT: return TRUE; + case PIPE_FORMAT_R32G32_SINT: return TRUE; + case PIPE_FORMAT_R32_SINT: return TRUE; + + case PIPE_FORMAT_R16G16B16A16_SINT: return TRUE; + case PIPE_FORMAT_R16G16B16_SINT: return TRUE; + case PIPE_FORMAT_R16G16_SINT: return TRUE; + case PIPE_FORMAT_R16_SINT: return TRUE; + + case PIPE_FORMAT_R8G8B8A8_SINT: return TRUE; + case PIPE_FORMAT_R8G8B8_SINT: return TRUE; + case PIPE_FORMAT_R8G8_SINT: return TRUE; + case PIPE_FORMAT_R8_SINT: return TRUE; + + case PIPE_FORMAT_B10G10R10A2_UNORM: return TRUE; + case PIPE_FORMAT_B10G10R10A2_USCALED: return TRUE; + case PIPE_FORMAT_B10G10R10A2_SNORM: return TRUE; + case PIPE_FORMAT_B10G10R10A2_SSCALED: return TRUE; + + case PIPE_FORMAT_R10G10B10A2_UNORM: return TRUE; + case PIPE_FORMAT_R10G10B10A2_USCALED: return TRUE; + case PIPE_FORMAT_R10G10B10A2_SNORM: return TRUE; + case PIPE_FORMAT_R10G10B10A2_SSCALED: return TRUE; + + default: return FALSE; + } +} diff --git a/drivers/video/Gallium/auxiliary/translate/translate_sse.c b/drivers/video/Gallium/auxiliary/translate/translate_sse.c new file mode 100644 index 0000000000..a4f7b243c1 --- /dev/null +++ b/drivers/video/Gallium/auxiliary/translate/translate_sse.c @@ -0,0 +1,1573 @@ +/* + * Copyright 2003 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell + */ + + +#include "pipe/p_config.h" +#include "pipe/p_compiler.h" +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_format.h" + +#include "translate.h" + + +#if (defined(PIPE_ARCH_X86) || (defined(PIPE_ARCH_X86_64) && !defined(__MINGW32__))) && !defined(PIPE_SUBSYSTEM_EMBEDDED) + +#include "rtasm/rtasm_cpu.h" +#include "rtasm/rtasm_x86sse.h" + + +#define X 0 +#define Y 1 +#define Z 2 +#define W 3 + + +struct translate_buffer { + const void *base_ptr; + uintptr_t stride; + unsigned max_index; +}; + +struct translate_buffer_variant { + unsigned buffer_index; + unsigned instance_divisor; + void *ptr; /* updated either per vertex or per instance */ +}; + + +#define ELEMENT_BUFFER_INSTANCE_ID 1001 + +#define NUM_CONSTS 7 + +enum +{ + CONST_IDENTITY, + CONST_INV_127, + CONST_INV_255, + CONST_INV_32767, + CONST_INV_65535, + CONST_INV_2147483647, + CONST_255 +}; + +#define C(v) {(float)(v), (float)(v), (float)(v), (float)(v)} +static float consts[NUM_CONSTS][4] = { + {0, 0, 0, 1}, + C(1.0 / 127.0), + C(1.0 / 255.0), + C(1.0 / 32767.0), + C(1.0 / 65535.0), + C(1.0 / 2147483647.0), + C(255.0) +}; +#undef C + +struct translate_sse { + struct translate translate; + + struct x86_function linear_func; + struct x86_function elt_func; + struct x86_function elt16_func; + struct x86_function elt8_func; + struct x86_function *func; + + PIPE_ALIGN_VAR(16) float consts[NUM_CONSTS][4]; + int8_t reg_to_const[16]; + int8_t const_to_reg[NUM_CONSTS]; + + struct translate_buffer buffer[PIPE_MAX_ATTRIBS]; + unsigned nr_buffers; + + /* Multiple buffer variants can map to a single buffer. */ + struct translate_buffer_variant buffer_variant[PIPE_MAX_ATTRIBS]; + unsigned nr_buffer_variants; + + /* Multiple elements can map to a single buffer variant. */ + unsigned element_to_buffer_variant[PIPE_MAX_ATTRIBS]; + + boolean use_instancing; + unsigned instance_id; + unsigned start_instance; + + /* these are actually known values, but putting them in a struct + * like this is helpful to keep them in sync across the file. + */ + struct x86_reg tmp_EAX; + struct x86_reg tmp2_EDX; + struct x86_reg src_ECX; + struct x86_reg idx_ESI; /* either start+i or &elt[i] */ + struct x86_reg machine_EDI; + struct x86_reg outbuf_EBX; + struct x86_reg count_EBP; /* decrements to zero */ +}; + +static int get_offset( const void *a, const void *b ) +{ + return (const char *)b - (const char *)a; +} + +static struct x86_reg get_const( struct translate_sse *p, unsigned id) +{ + struct x86_reg reg; + unsigned i; + + if(p->const_to_reg[id] >= 0) + return x86_make_reg(file_XMM, p->const_to_reg[id]); + + for(i = 2; i < 8; ++i) + { + if(p->reg_to_const[i] < 0) + break; + } + + /* TODO: be smarter here */ + if(i == 8) + --i; + + reg = x86_make_reg(file_XMM, i); + + if(p->reg_to_const[i] >= 0) + p->const_to_reg[p->reg_to_const[i]] = -1; + + p->reg_to_const[i] = id; + p->const_to_reg[id] = i; + + /* TODO: this should happen outside the loop, if possible */ + sse_movaps(p->func, reg, + x86_make_disp(p->machine_EDI, + get_offset(p, &p->consts[id][0]))); + + return reg; +} + +/* load the data in a SSE2 register, padding with zeros */ +static boolean emit_load_sse2( struct translate_sse *p, + struct x86_reg data, + struct x86_reg src, + unsigned size) +{ + struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1); + struct x86_reg tmp = p->tmp_EAX; + switch(size) + { + case 1: + x86_movzx8(p->func, tmp, src); + sse2_movd(p->func, data, tmp); + break; + case 2: + x86_movzx16(p->func, tmp, src); + sse2_movd(p->func, data, tmp); + break; + case 3: + x86_movzx8(p->func, tmp, x86_make_disp(src, 2)); + x86_shl_imm(p->func, tmp, 16); + x86_mov16(p->func, tmp, src); + sse2_movd(p->func, data, tmp); + break; + case 4: + sse2_movd(p->func, data, src); + break; + case 6: + sse2_movd(p->func, data, src); + x86_movzx16(p->func, tmp, x86_make_disp(src, 4)); + sse2_movd(p->func, tmpXMM, tmp); + sse2_punpckldq(p->func, data, tmpXMM); + break; + case 8: + sse2_movq(p->func, data, src); + break; + case 12: + sse2_movq(p->func, data, src); + sse2_movd(p->func, tmpXMM, x86_make_disp(src, 8)); + sse2_punpcklqdq(p->func, data, tmpXMM); + break; + case 16: + sse2_movdqu(p->func, data, src); + break; + default: + return FALSE; + } + return TRUE; +} + +/* this value can be passed for the out_chans argument */ +#define CHANNELS_0001 5 + +/* this function will load #chans float values, and will + * pad the register with zeroes at least up to out_chans. + * + * If out_chans is set to CHANNELS_0001, then the fourth + * value will be padded with 1. Only pass this value if + * chans < 4 or results are undefined. + */ +static void emit_load_float32( struct translate_sse *p, + struct x86_reg data, + struct x86_reg arg0, + unsigned out_chans, + unsigned chans) +{ + switch(chans) + { + case 1: + /* a 0 0 0 + * a 0 0 1 + */ + sse_movss(p->func, data, arg0); + if(out_chans == CHANNELS_0001) + sse_orps(p->func, data, get_const(p, CONST_IDENTITY) ); + break; + case 2: + /* 0 0 0 1 + * a b 0 1 + */ + if(out_chans == CHANNELS_0001) + sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W) ); + else if(out_chans > 2) + sse_movlhps(p->func, data, get_const(p, CONST_IDENTITY) ); + sse_movlps(p->func, data, arg0); + break; + case 3: + /* Have to jump through some hoops: + * + * c 0 0 0 + * c 0 0 1 if out_chans == CHANNELS_0001 + * 0 0 c 0/1 + * a b c 0/1 + */ + sse_movss(p->func, data, x86_make_disp(arg0, 8)); + if(out_chans == CHANNELS_0001) + sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X,Y,Z,W) ); + sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) ); + sse_movlps(p->func, data, arg0); + break; + case 4: + sse_movups(p->func, data, arg0); + break; + } +} + +/* this function behaves like emit_load_float32, but loads + 64-bit floating point numbers, converting them to 32-bit + ones */ +static void emit_load_float64to32( struct translate_sse *p, + struct x86_reg data, + struct x86_reg arg0, + unsigned out_chans, + unsigned chans) +{ + struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1); + switch(chans) + { + case 1: + sse2_movsd(p->func, data, arg0); + if(out_chans > 1) + sse2_cvtpd2ps(p->func, data, data); + else + sse2_cvtsd2ss(p->func, data, data); + if(out_chans == CHANNELS_0001) + sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W) ); + break; + case 2: + sse2_movupd(p->func, data, arg0); + sse2_cvtpd2ps(p->func, data, data); + if(out_chans == CHANNELS_0001) + sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W) ); + else if(out_chans > 2) + sse_movlhps(p->func, data, get_const(p, CONST_IDENTITY) ); + break; + case 3: + sse2_movupd(p->func, data, arg0); + sse2_cvtpd2ps(p->func, data, data); + sse2_movsd(p->func, tmpXMM, x86_make_disp(arg0, 16)); + if(out_chans > 3) + sse2_cvtpd2ps(p->func, tmpXMM, tmpXMM); + else + sse2_cvtsd2ss(p->func, tmpXMM, tmpXMM); + sse_movlhps(p->func, data, tmpXMM); + if(out_chans == CHANNELS_0001) + sse_orps(p->func, data, get_const(p, CONST_IDENTITY) ); + break; + case 4: + sse2_movupd(p->func, data, arg0); + sse2_cvtpd2ps(p->func, data, data); + sse2_movupd(p->func, tmpXMM, x86_make_disp(arg0, 16)); + sse2_cvtpd2ps(p->func, tmpXMM, tmpXMM); + sse_movlhps(p->func, data, tmpXMM); + break; + } +} + +static void emit_mov64(struct translate_sse *p, struct x86_reg dst_gpr, struct x86_reg dst_xmm, struct x86_reg src_gpr, struct x86_reg src_xmm) +{ + if(x86_target(p->func) != X86_32) + x64_mov64(p->func, dst_gpr, src_gpr); + else + { + /* TODO: when/on which CPUs is SSE2 actually better than SSE? */ + if(x86_target_caps(p->func) & X86_SSE2) + sse2_movq(p->func, dst_xmm, src_xmm); + else + sse_movlps(p->func, dst_xmm, src_xmm); + } +} + +static void emit_load64(struct translate_sse *p, struct x86_reg dst_gpr, struct x86_reg dst_xmm, struct x86_reg src) +{ + emit_mov64(p, dst_gpr, dst_xmm, src, src); +} + +static void emit_store64(struct translate_sse *p, struct x86_reg dst, struct x86_reg src_gpr, struct x86_reg src_xmm) +{ + emit_mov64(p, dst, dst, src_gpr, src_xmm); +} + +static void emit_mov128(struct translate_sse *p, struct x86_reg dst, struct x86_reg src) +{ + if(x86_target_caps(p->func) & X86_SSE2) + sse2_movdqu(p->func, dst, src); + else + sse_movups(p->func, dst, src); +} + +/* TODO: this uses unaligned accesses liberally, which is great on Nehalem, + * but may or may not be good on older processors + * TODO: may perhaps want to use non-temporal stores here if possible + */ +static void emit_memcpy(struct translate_sse *p, struct x86_reg dst, struct x86_reg src, unsigned size) +{ + struct x86_reg dataXMM = x86_make_reg(file_XMM, 0); + struct x86_reg dataXMM2 = x86_make_reg(file_XMM, 1); + struct x86_reg dataGPR = p->tmp_EAX; + struct x86_reg dataGPR2 = p->tmp2_EDX; + + if(size < 8) + { + switch (size) + { + case 1: + x86_mov8(p->func, dataGPR, src); + x86_mov8(p->func, dst, dataGPR); + break; + case 2: + x86_mov16(p->func, dataGPR, src); + x86_mov16(p->func, dst, dataGPR); + break; + case 3: + x86_mov16(p->func, dataGPR, src); + x86_mov8(p->func, dataGPR2, x86_make_disp(src, 2)); + x86_mov16(p->func, dst, dataGPR); + x86_mov8(p->func, x86_make_disp(dst, 2), dataGPR2); + break; + case 4: + x86_mov(p->func, dataGPR, src); + x86_mov(p->func, dst, dataGPR); + break; + case 6: + x86_mov(p->func, dataGPR, src); + x86_mov16(p->func, dataGPR2, x86_make_disp(src, 4)); + x86_mov(p->func, dst, dataGPR); + x86_mov16(p->func, x86_make_disp(dst, 4), dataGPR2); + break; + } + } + else if(!(x86_target_caps(p->func) & X86_SSE)) + { + unsigned i = 0; + assert((size & 3) == 0); + for(i = 0; i < size; i += 4) + { + x86_mov(p->func, dataGPR, x86_make_disp(src, i)); + x86_mov(p->func, x86_make_disp(dst, i), dataGPR); + } + } + else + { + switch(size) + { + case 8: + emit_load64(p, dataGPR, dataXMM, src); + emit_store64(p, dst, dataGPR, dataXMM); + break; + case 12: + emit_load64(p, dataGPR2, dataXMM, src); + x86_mov(p->func, dataGPR, x86_make_disp(src, 8)); + emit_store64(p, dst, dataGPR2, dataXMM); + x86_mov(p->func, x86_make_disp(dst, 8), dataGPR); + break; + case 16: + emit_mov128(p, dataXMM, src); + emit_mov128(p, dst, dataXMM); + break; + case 24: + emit_mov128(p, dataXMM, src); + emit_load64(p, dataGPR, dataXMM2, x86_make_disp(src, 16)); + emit_mov128(p, dst, dataXMM); + emit_store64(p, x86_make_disp(dst, 16), dataGPR, dataXMM2); + break; + case 32: + emit_mov128(p, dataXMM, src); + emit_mov128(p, dataXMM2, x86_make_disp(src, 16)); + emit_mov128(p, dst, dataXMM); + emit_mov128(p, x86_make_disp(dst, 16), dataXMM2); + break; + default: + assert(0); + } + } +} + +static boolean translate_attr_convert( struct translate_sse *p, + const struct translate_element *a, + struct x86_reg src, + struct x86_reg dst) + +{ + const struct util_format_description* input_desc = util_format_description(a->input_format); + const struct util_format_description* output_desc = util_format_description(a->output_format); + unsigned i; + boolean id_swizzle = TRUE; + unsigned swizzle[4] = {UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE}; + unsigned needed_chans = 0; + unsigned imms[2] = {0, 0x3f800000}; + + if(a->output_format == PIPE_FORMAT_NONE || a->input_format == PIPE_FORMAT_NONE) + return FALSE; + + if(input_desc->channel[0].size & 7) + return FALSE; + + if(input_desc->colorspace != output_desc->colorspace) + return FALSE; + + for(i = 1; i < input_desc->nr_channels; ++i) + { + if(memcmp(&input_desc->channel[i], &input_desc->channel[0], sizeof(input_desc->channel[0]))) + return FALSE; + } + + for(i = 1; i < output_desc->nr_channels; ++i) + { + if(memcmp(&output_desc->channel[i], &output_desc->channel[0], sizeof(output_desc->channel[0]))) + return FALSE; + } + + for(i = 0; i < output_desc->nr_channels; ++i) + { + if(output_desc->swizzle[i] < 4) + swizzle[output_desc->swizzle[i]] = input_desc->swizzle[i]; + } + + if((x86_target_caps(p->func) & X86_SSE) && (0 + || a->output_format == PIPE_FORMAT_R32_FLOAT + || a->output_format == PIPE_FORMAT_R32G32_FLOAT + || a->output_format == PIPE_FORMAT_R32G32B32_FLOAT + || a->output_format == PIPE_FORMAT_R32G32B32A32_FLOAT)) + { + struct x86_reg dataXMM = x86_make_reg(file_XMM, 0); + + for(i = 0; i < output_desc->nr_channels; ++i) + { + if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0 && i >= input_desc->nr_channels) + swizzle[i] = i; + } + + for(i = 0; i < output_desc->nr_channels; ++i) + { + if(swizzle[i] < 4) + needed_chans = MAX2(needed_chans, swizzle[i] + 1); + if(swizzle[i] < UTIL_FORMAT_SWIZZLE_0 && swizzle[i] != i) + id_swizzle = FALSE; + } + + if(needed_chans > 0) + { + switch(input_desc->channel[0].type) + { + case UTIL_FORMAT_TYPE_UNSIGNED: + if(!(x86_target_caps(p->func) & X86_SSE2)) + return FALSE; + emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3); + + /* TODO: add support for SSE4.1 pmovzx */ + switch(input_desc->channel[0].size) + { + case 8: + /* TODO: this may be inefficient due to get_identity() being used both as a float and integer register */ + sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY)); + sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY)); + break; + case 16: + sse2_punpcklwd(p->func, dataXMM, get_const(p, CONST_IDENTITY)); + break; + case 32: /* we lose precision here */ + sse2_psrld_imm(p->func, dataXMM, 1); + break; + default: + return FALSE; + } + sse2_cvtdq2ps(p->func, dataXMM, dataXMM); + if(input_desc->channel[0].normalized) + { + struct x86_reg factor; + switch(input_desc->channel[0].size) + { + case 8: + factor = get_const(p, CONST_INV_255); + break; + case 16: + factor = get_const(p, CONST_INV_65535); + break; + case 32: + factor = get_const(p, CONST_INV_2147483647); + break; + default: + assert(0); + factor.disp = 0; + factor.file = 0; + factor.idx = 0; + factor.mod = 0; + break; + } + sse_mulps(p->func, dataXMM, factor); + } + else if(input_desc->channel[0].size == 32) + sse_addps(p->func, dataXMM, dataXMM); /* compensate for the bit we threw away to fit u32 into s32 */ + break; + case UTIL_FORMAT_TYPE_SIGNED: + if(!(x86_target_caps(p->func) & X86_SSE2)) + return FALSE; + emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3); + + /* TODO: add support for SSE4.1 pmovsx */ + switch(input_desc->channel[0].size) + { + case 8: + sse2_punpcklbw(p->func, dataXMM, dataXMM); + sse2_punpcklbw(p->func, dataXMM, dataXMM); + sse2_psrad_imm(p->func, dataXMM, 24); + break; + case 16: + sse2_punpcklwd(p->func, dataXMM, dataXMM); + sse2_psrad_imm(p->func, dataXMM, 16); + break; + case 32: /* we lose precision here */ + break; + default: + return FALSE; + } + sse2_cvtdq2ps(p->func, dataXMM, dataXMM); + if(input_desc->channel[0].normalized) + { + struct x86_reg factor; + switch(input_desc->channel[0].size) + { + case 8: + factor = get_const(p, CONST_INV_127); + break; + case 16: + factor = get_const(p, CONST_INV_32767); + break; + case 32: + factor = get_const(p, CONST_INV_2147483647); + break; + default: + assert(0); + factor.disp = 0; + factor.file = 0; + factor.idx = 0; + factor.mod = 0; + break; + } + sse_mulps(p->func, dataXMM, factor); + } + break; + + break; + case UTIL_FORMAT_TYPE_FLOAT: + if(input_desc->channel[0].size != 32 && input_desc->channel[0].size != 64) + return FALSE; + if(swizzle[3] == UTIL_FORMAT_SWIZZLE_1 && input_desc->nr_channels <= 3) + { + swizzle[3] = UTIL_FORMAT_SWIZZLE_W; + needed_chans = CHANNELS_0001; + } + switch(input_desc->channel[0].size) + { + case 32: + emit_load_float32(p, dataXMM, src, needed_chans, input_desc->nr_channels); + break; + case 64: /* we lose precision here */ + if(!(x86_target_caps(p->func) & X86_SSE2)) + return FALSE; + emit_load_float64to32(p, dataXMM, src, needed_chans, input_desc->nr_channels); + break; + default: + return FALSE; + } + break; + default: + return FALSE; + } + + if(!id_swizzle) + sse_shufps(p->func, dataXMM, dataXMM, SHUF(swizzle[0], swizzle[1], swizzle[2], swizzle[3]) ); + } + + if(output_desc->nr_channels >= 4 + && swizzle[0] < UTIL_FORMAT_SWIZZLE_0 + && swizzle[1] < UTIL_FORMAT_SWIZZLE_0 + && swizzle[2] < UTIL_FORMAT_SWIZZLE_0 + && swizzle[3] < UTIL_FORMAT_SWIZZLE_0 + ) + sse_movups(p->func, dst, dataXMM); + else + { + if(output_desc->nr_channels >= 2 + && swizzle[0] < UTIL_FORMAT_SWIZZLE_0 + && swizzle[1] < UTIL_FORMAT_SWIZZLE_0) + sse_movlps(p->func, dst, dataXMM); + else + { + if(swizzle[0] < UTIL_FORMAT_SWIZZLE_0) + sse_movss(p->func, dst, dataXMM); + else + x86_mov_imm(p->func, dst, imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]); + + if(output_desc->nr_channels >= 2) + { + if(swizzle[1] < UTIL_FORMAT_SWIZZLE_0) + { + sse_shufps(p->func, dataXMM, dataXMM, SHUF(1, 1, 2, 3)); + sse_movss(p->func, x86_make_disp(dst, 4), dataXMM); + } + else + x86_mov_imm(p->func, x86_make_disp(dst, 4), imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0]); + } + } + + if(output_desc->nr_channels >= 3) + { + if(output_desc->nr_channels >= 4 + && swizzle[2] < UTIL_FORMAT_SWIZZLE_0 + && swizzle[3] < UTIL_FORMAT_SWIZZLE_0) + sse_movhps(p->func, x86_make_disp(dst, 8), dataXMM); + else + { + if(swizzle[2] < UTIL_FORMAT_SWIZZLE_0) + { + sse_shufps(p->func, dataXMM, dataXMM, SHUF(2, 2, 2, 3)); + sse_movss(p->func, x86_make_disp(dst, 8), dataXMM); + } + else + x86_mov_imm(p->func, x86_make_disp(dst, 8), imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]); + + if(output_desc->nr_channels >= 4) + { + if(swizzle[3] < UTIL_FORMAT_SWIZZLE_0) + { + sse_shufps(p->func, dataXMM, dataXMM, SHUF(3, 3, 3, 3)); + sse_movss(p->func, x86_make_disp(dst, 12), dataXMM); + } + else + x86_mov_imm(p->func, x86_make_disp(dst, 12), imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0]); + } + } + } + } + return TRUE; + } + else if((x86_target_caps(p->func) & X86_SSE2) && input_desc->channel[0].size == 8 && output_desc->channel[0].size == 16 + && output_desc->channel[0].normalized == input_desc->channel[0].normalized + && (0 + || (input_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) + || (input_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) + || (input_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) + )) + { + struct x86_reg dataXMM = x86_make_reg(file_XMM, 0); + struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1); + struct x86_reg tmp = p->tmp_EAX; + unsigned imms[2] = {0, 1}; + + for(i = 0; i < output_desc->nr_channels; ++i) + { + if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0 && i >= input_desc->nr_channels) + swizzle[i] = i; + } + + for(i = 0; i < output_desc->nr_channels; ++i) + { + if(swizzle[i] < 4) + needed_chans = MAX2(needed_chans, swizzle[i] + 1); + if(swizzle[i] < UTIL_FORMAT_SWIZZLE_0 && swizzle[i] != i) + id_swizzle = FALSE; + } + + if(needed_chans > 0) + { + emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3); + + switch(input_desc->channel[0].type) + { + case UTIL_FORMAT_TYPE_UNSIGNED: + if(input_desc->channel[0].normalized) + { + sse2_punpcklbw(p->func, dataXMM, dataXMM); + if(output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) + sse2_psrlw_imm(p->func, dataXMM, 1); + } + else + sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY)); + break; + case UTIL_FORMAT_TYPE_SIGNED: + if(input_desc->channel[0].normalized) + { + sse2_movq(p->func, tmpXMM, get_const(p, CONST_IDENTITY)); + sse2_punpcklbw(p->func, tmpXMM, dataXMM); + sse2_psllw_imm(p->func, dataXMM, 9); + sse2_psrlw_imm(p->func, dataXMM, 8); + sse2_por(p->func, tmpXMM, dataXMM); + sse2_psrlw_imm(p->func, dataXMM, 7); + sse2_por(p->func, tmpXMM, dataXMM); + { + struct x86_reg t = dataXMM; + dataXMM = tmpXMM; + tmpXMM = t; + } + } + else + { + sse2_punpcklbw(p->func, dataXMM, dataXMM); + sse2_psraw_imm(p->func, dataXMM, 8); + } + break; + default: + assert(0); + } + + if(output_desc->channel[0].normalized) + imms[1] = (output_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) ? 0xffff : 0x7ffff; + + if(!id_swizzle) + sse2_pshuflw(p->func, dataXMM, dataXMM, (swizzle[0] & 3) | ((swizzle[1] & 3) << 2) | ((swizzle[2] & 3) << 4) | ((swizzle[3] & 3) << 6)); + } + + if(output_desc->nr_channels >= 4 + && swizzle[0] < UTIL_FORMAT_SWIZZLE_0 + && swizzle[1] < UTIL_FORMAT_SWIZZLE_0 + && swizzle[2] < UTIL_FORMAT_SWIZZLE_0 + && swizzle[3] < UTIL_FORMAT_SWIZZLE_0 + ) + sse2_movq(p->func, dst, dataXMM); + else + { + if(swizzle[0] < UTIL_FORMAT_SWIZZLE_0) + { + if(output_desc->nr_channels >= 2 && swizzle[1] < UTIL_FORMAT_SWIZZLE_0) + sse2_movd(p->func, dst, dataXMM); + else + { + sse2_movd(p->func, tmp, dataXMM); + x86_mov16(p->func, dst, tmp); + if(output_desc->nr_channels >= 2) + x86_mov16_imm(p->func, x86_make_disp(dst, 2), imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0]); + } + } + else + { + if(output_desc->nr_channels >= 2 && swizzle[1] >= UTIL_FORMAT_SWIZZLE_0) + x86_mov_imm(p->func, dst, (imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0] << 16) | imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]); + else + { + x86_mov16_imm(p->func, dst, imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]); + if(output_desc->nr_channels >= 2) + { + sse2_movd(p->func, tmp, dataXMM); + x86_shr_imm(p->func, tmp, 16); + x86_mov16(p->func, x86_make_disp(dst, 2), tmp); + } + } + } + + if(output_desc->nr_channels >= 3) + { + if(swizzle[2] < UTIL_FORMAT_SWIZZLE_0) + { + if(output_desc->nr_channels >= 4 && swizzle[3] < UTIL_FORMAT_SWIZZLE_0) + { + sse2_psrlq_imm(p->func, dataXMM, 32); + sse2_movd(p->func, x86_make_disp(dst, 4), dataXMM); + } + else + { + sse2_psrlq_imm(p->func, dataXMM, 32); + sse2_movd(p->func, tmp, dataXMM); + x86_mov16(p->func, x86_make_disp(dst, 4), tmp); + if(output_desc->nr_channels >= 4) + { + x86_mov16_imm(p->func, x86_make_disp(dst, 6), imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0]); + } + } + } + else + { + if(output_desc->nr_channels >= 4 && swizzle[3] >= UTIL_FORMAT_SWIZZLE_0) + x86_mov_imm(p->func, x86_make_disp(dst, 4), (imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0] << 16) | imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]); + else + { + x86_mov16_imm(p->func, x86_make_disp(dst, 4), imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]); + + if(output_desc->nr_channels >= 4) + { + sse2_psrlq_imm(p->func, dataXMM, 48); + sse2_movd(p->func, tmp, dataXMM); + x86_mov16(p->func, x86_make_disp(dst, 6), tmp); + } + } + } + } + } + return TRUE; + } + else if(!memcmp(&output_desc->channel[0], &input_desc->channel[0], sizeof(output_desc->channel[0]))) + { + struct x86_reg tmp = p->tmp_EAX; + unsigned i; + if(input_desc->channel[0].size == 8 && input_desc->nr_channels == 4 && output_desc->nr_channels == 4 + && swizzle[0] == UTIL_FORMAT_SWIZZLE_W + && swizzle[1] == UTIL_FORMAT_SWIZZLE_Z + && swizzle[2] == UTIL_FORMAT_SWIZZLE_Y + && swizzle[3] == UTIL_FORMAT_SWIZZLE_X) + { + /* TODO: support movbe */ + x86_mov(p->func, tmp, src); + x86_bswap(p->func, tmp); + x86_mov(p->func, dst, tmp); + return TRUE; + } + + for(i = 0; i < output_desc->nr_channels; ++i) + { + switch(output_desc->channel[0].size) + { + case 8: + if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0) + { + unsigned v = 0; + if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1) + { + switch(output_desc->channel[0].type) + { + case UTIL_FORMAT_TYPE_UNSIGNED: + v = output_desc->channel[0].normalized ? 0xff : 1; + break; + case UTIL_FORMAT_TYPE_SIGNED: + v = output_desc->channel[0].normalized ? 0x7f : 1; + break; + default: + return FALSE; + } + } + x86_mov8_imm(p->func, x86_make_disp(dst, i * 1), v); + } + else + { + x86_mov8(p->func, tmp, x86_make_disp(src, swizzle[i] * 1)); + x86_mov8(p->func, x86_make_disp(dst, i * 1), tmp); + } + break; + case 16: + if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0) + { + unsigned v = 0; + if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1) + { + switch(output_desc->channel[1].type) + { + case UTIL_FORMAT_TYPE_UNSIGNED: + v = output_desc->channel[1].normalized ? 0xffff : 1; + break; + case UTIL_FORMAT_TYPE_SIGNED: + v = output_desc->channel[1].normalized ? 0x7fff : 1; + break; + case UTIL_FORMAT_TYPE_FLOAT: + v = 0x3c00; + break; + default: + return FALSE; + } + } + x86_mov16_imm(p->func, x86_make_disp(dst, i * 2), v); + } + else if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0) + x86_mov16_imm(p->func, x86_make_disp(dst, i * 2), 0); + else + { + x86_mov16(p->func, tmp, x86_make_disp(src, swizzle[i] * 2)); + x86_mov16(p->func, x86_make_disp(dst, i * 2), tmp); + } + break; + case 32: + if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0) + { + unsigned v = 0; + if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1) + { + switch(output_desc->channel[1].type) + { + case UTIL_FORMAT_TYPE_UNSIGNED: + v = output_desc->channel[1].normalized ? 0xffffffff : 1; + break; + case UTIL_FORMAT_TYPE_SIGNED: + v = output_desc->channel[1].normalized ? 0x7fffffff : 1; + break; + case UTIL_FORMAT_TYPE_FLOAT: + v = 0x3f800000; + break; + default: + return FALSE; + } + } + x86_mov_imm(p->func, x86_make_disp(dst, i * 4), v); + } + else + { + x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 4)); + x86_mov(p->func, x86_make_disp(dst, i * 4), tmp); + } + break; + case 64: + if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0) + { + unsigned l = 0; + unsigned h = 0; + if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1) + { + switch(output_desc->channel[1].type) + { + case UTIL_FORMAT_TYPE_UNSIGNED: + h = output_desc->channel[1].normalized ? 0xffffffff : 0; + l = output_desc->channel[1].normalized ? 0xffffffff : 1; + break; + case UTIL_FORMAT_TYPE_SIGNED: + h = output_desc->channel[1].normalized ? 0x7fffffff : 0; + l = output_desc->channel[1].normalized ? 0xffffffff : 1; + break; + case UTIL_FORMAT_TYPE_FLOAT: + h = 0x3ff00000; + l = 0; + break; + default: + return FALSE; + } + } + x86_mov_imm(p->func, x86_make_disp(dst, i * 8), l); + x86_mov_imm(p->func, x86_make_disp(dst, i * 8 + 4), h); + } + else + { + if(x86_target_caps(p->func) & X86_SSE) + { + struct x86_reg tmpXMM = x86_make_reg(file_XMM, 0); + emit_load64(p, tmp, tmpXMM, x86_make_disp(src, swizzle[i] * 8)); + emit_store64(p, x86_make_disp(dst, i * 8), tmp, tmpXMM); + } + else + { + x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 8)); + x86_mov(p->func, x86_make_disp(dst, i * 8), tmp); + x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 8 + 4)); + x86_mov(p->func, x86_make_disp(dst, i * 8 + 4), tmp); + } + } + break; + default: + return FALSE; + } + } + return TRUE; + } + /* special case for draw's EMIT_4UB (RGBA) and EMIT_4UB_BGRA */ + else if((x86_target_caps(p->func) & X86_SSE2) && + a->input_format == PIPE_FORMAT_R32G32B32A32_FLOAT && (0 + || a->output_format == PIPE_FORMAT_B8G8R8A8_UNORM + || a->output_format == PIPE_FORMAT_R8G8B8A8_UNORM + )) + { + struct x86_reg dataXMM = x86_make_reg(file_XMM, 0); + + /* load */ + sse_movups(p->func, dataXMM, src); + + if (a->output_format == PIPE_FORMAT_B8G8R8A8_UNORM) + sse_shufps(p->func, dataXMM, dataXMM, SHUF(2,1,0,3)); + + /* scale by 255.0 */ + sse_mulps(p->func, dataXMM, get_const(p, CONST_255)); + + /* pack and emit */ + sse2_cvtps2dq(p->func, dataXMM, dataXMM); + sse2_packssdw(p->func, dataXMM, dataXMM); + sse2_packuswb(p->func, dataXMM, dataXMM); + sse2_movd(p->func, dst, dataXMM); + + return TRUE; + } + + return FALSE; +} + +static boolean translate_attr( struct translate_sse *p, + const struct translate_element *a, + struct x86_reg src, + struct x86_reg dst) +{ + if(a->input_format == a->output_format) + { + emit_memcpy(p, dst, src, util_format_get_stride(a->input_format, 1)); + return TRUE; + } + + return translate_attr_convert(p, a, src, dst); +} + +static boolean init_inputs( struct translate_sse *p, + unsigned index_size ) +{ + unsigned i; + struct x86_reg instance_id = x86_make_disp(p->machine_EDI, + get_offset(p, &p->instance_id)); + struct x86_reg start_instance = x86_make_disp(p->machine_EDI, + get_offset(p, &p->start_instance)); + + for (i = 0; i < p->nr_buffer_variants; i++) { + struct translate_buffer_variant *variant = &p->buffer_variant[i]; + struct translate_buffer *buffer = &p->buffer[variant->buffer_index]; + + if (!index_size || variant->instance_divisor) { + struct x86_reg buf_max_index = x86_make_disp(p->machine_EDI, + get_offset(p, &buffer->max_index)); + struct x86_reg buf_stride = x86_make_disp(p->machine_EDI, + get_offset(p, &buffer->stride)); + struct x86_reg buf_ptr = x86_make_disp(p->machine_EDI, + get_offset(p, &variant->ptr)); + struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDI, + get_offset(p, &buffer->base_ptr)); + struct x86_reg elt = p->idx_ESI; + struct x86_reg tmp_EAX = p->tmp_EAX; + + /* Calculate pointer to first attrib: + * base_ptr + stride * index, where index depends on instance divisor + */ + if (variant->instance_divisor) { + /* Start with instance = instance_id + * which is true if divisor is 1. + */ + x86_mov(p->func, tmp_EAX, instance_id); + + if (variant->instance_divisor != 1) { + struct x86_reg tmp_EDX = p->tmp2_EDX; + struct x86_reg tmp_ECX = p->src_ECX; + + /* instance_num = instance_id - start_instance */ + x86_mov(p->func, tmp_EDX, start_instance); + x86_sub(p->func, tmp_EAX, tmp_EDX); + + /* TODO: Add x86_shr() to rtasm and use it whenever + * instance divisor is power of two. + */ + x86_xor(p->func, tmp_EDX, tmp_EDX); + x86_mov_reg_imm(p->func, tmp_ECX, variant->instance_divisor); + x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */ + + /* instance = (instance_id - start_instance) / divisor + + * start_instance + */ + x86_mov(p->func, tmp_EDX, start_instance); + x86_add(p->func, tmp_EAX, tmp_EDX); + } + + /* XXX we need to clamp the index here too, but to a + * per-array max value, not the draw->pt.max_index value + * that's being given to us via translate->set_buffer(). + */ + } else { + x86_mov(p->func, tmp_EAX, elt); + + /* Clamp to max_index + */ + x86_cmp(p->func, tmp_EAX, buf_max_index); + x86_cmovcc(p->func, tmp_EAX, buf_max_index, cc_AE); + } + + x86_imul(p->func, tmp_EAX, buf_stride); + x64_rexw(p->func); + x86_add(p->func, tmp_EAX, buf_base_ptr); + + x86_cmp(p->func, p->count_EBP, p->tmp_EAX); + + /* In the linear case, keep the buffer pointer instead of the + * index number. + */ + if (!index_size && p->nr_buffer_variants == 1) + { + x64_rexw(p->func); + x86_mov(p->func, elt, tmp_EAX); + } + else + { + x64_rexw(p->func); + x86_mov(p->func, buf_ptr, tmp_EAX); + } + } + } + + return TRUE; +} + + +static struct x86_reg get_buffer_ptr( struct translate_sse *p, + unsigned index_size, + unsigned var_idx, + struct x86_reg elt ) +{ + if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) { + return x86_make_disp(p->machine_EDI, + get_offset(p, &p->instance_id)); + } + if (!index_size && p->nr_buffer_variants == 1) { + return p->idx_ESI; + } + else if (!index_size || p->buffer_variant[var_idx].instance_divisor) { + struct x86_reg ptr = p->src_ECX; + struct x86_reg buf_ptr = + x86_make_disp(p->machine_EDI, + get_offset(p, &p->buffer_variant[var_idx].ptr)); + + x64_rexw(p->func); + x86_mov(p->func, ptr, buf_ptr); + return ptr; + } + else { + struct x86_reg ptr = p->src_ECX; + const struct translate_buffer_variant *variant = &p->buffer_variant[var_idx]; + + struct x86_reg buf_stride = + x86_make_disp(p->machine_EDI, + get_offset(p, &p->buffer[variant->buffer_index].stride)); + + struct x86_reg buf_base_ptr = + x86_make_disp(p->machine_EDI, + get_offset(p, &p->buffer[variant->buffer_index].base_ptr)); + + struct x86_reg buf_max_index = + x86_make_disp(p->machine_EDI, + get_offset(p, &p->buffer[variant->buffer_index].max_index)); + + + + /* Calculate pointer to current attrib: + */ + switch(index_size) + { + case 1: + x86_movzx8(p->func, ptr, elt); + break; + case 2: + x86_movzx16(p->func, ptr, elt); + break; + case 4: + x86_mov(p->func, ptr, elt); + break; + } + + /* Clamp to max_index + */ + x86_cmp(p->func, ptr, buf_max_index); + x86_cmovcc(p->func, ptr, buf_max_index, cc_AE); + + x86_imul(p->func, ptr, buf_stride); + x64_rexw(p->func); + x86_add(p->func, ptr, buf_base_ptr); + return ptr; + } +} + + + +static boolean incr_inputs( struct translate_sse *p, + unsigned index_size ) +{ + if (!index_size && p->nr_buffer_variants == 1) { + struct x86_reg stride = x86_make_disp(p->machine_EDI, + get_offset(p, &p->buffer[0].stride)); + + if (p->buffer_variant[0].instance_divisor == 0) { + x64_rexw(p->func); + x86_add(p->func, p->idx_ESI, stride); + sse_prefetchnta(p->func, x86_make_disp(p->idx_ESI, 192)); + } + } + else if (!index_size) { + unsigned i; + + /* Is this worthwhile?? + */ + for (i = 0; i < p->nr_buffer_variants; i++) { + struct translate_buffer_variant *variant = &p->buffer_variant[i]; + struct x86_reg buf_ptr = x86_make_disp(p->machine_EDI, + get_offset(p, &variant->ptr)); + struct x86_reg buf_stride = x86_make_disp(p->machine_EDI, + get_offset(p, &p->buffer[variant->buffer_index].stride)); + + if (variant->instance_divisor == 0) { + x86_mov(p->func, p->tmp_EAX, buf_stride); + x64_rexw(p->func); + x86_add(p->func, p->tmp_EAX, buf_ptr); + if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192)); + x64_rexw(p->func); + x86_mov(p->func, buf_ptr, p->tmp_EAX); + } + } + } + else { + x64_rexw(p->func); + x86_lea(p->func, p->idx_ESI, x86_make_disp(p->idx_ESI, index_size)); + } + + return TRUE; +} + + +/* Build run( struct translate *machine, + * unsigned start, + * unsigned count, + * void *output_buffer ) + * or + * run_elts( struct translate *machine, + * unsigned *elts, + * unsigned count, + * void *output_buffer ) + * + * Lots of hardcoding + * + * EAX -- pointer to current output vertex + * ECX -- pointer to current attribute + * + */ +static boolean build_vertex_emit( struct translate_sse *p, + struct x86_function *func, + unsigned index_size ) +{ + int fixup, label; + unsigned j; + + memset(p->reg_to_const, 0xff, sizeof(p->reg_to_const)); + memset(p->const_to_reg, 0xff, sizeof(p->const_to_reg)); + + p->tmp_EAX = x86_make_reg(file_REG32, reg_AX); + p->idx_ESI = x86_make_reg(file_REG32, reg_SI); + p->outbuf_EBX = x86_make_reg(file_REG32, reg_BX); + p->machine_EDI = x86_make_reg(file_REG32, reg_DI); + p->count_EBP = x86_make_reg(file_REG32, reg_BP); + p->tmp2_EDX = x86_make_reg(file_REG32, reg_DX); + p->src_ECX = x86_make_reg(file_REG32, reg_CX); + + p->func = func; + + x86_init_func(p->func); + + if(x86_target(p->func) == X86_64_WIN64_ABI) + { + /* the ABI guarantees a 16-byte aligned 32-byte "shadow space" above the return address */ + sse2_movdqa(p->func, x86_make_disp(x86_make_reg(file_REG32, reg_SP), 8), x86_make_reg(file_XMM, 6)); + sse2_movdqa(p->func, x86_make_disp(x86_make_reg(file_REG32, reg_SP), 24), x86_make_reg(file_XMM, 7)); + } + + x86_push(p->func, p->outbuf_EBX); + x86_push(p->func, p->count_EBP); + +/* on non-Win64 x86-64, these are already in the right registers */ + if(x86_target(p->func) != X86_64_STD_ABI) + { + x86_push(p->func, p->machine_EDI); + x86_push(p->func, p->idx_ESI); + + x86_mov(p->func, p->machine_EDI, x86_fn_arg(p->func, 1)); + x86_mov(p->func, p->idx_ESI, x86_fn_arg(p->func, 2)); + } + + x86_mov(p->func, p->count_EBP, x86_fn_arg(p->func, 3)); + + if(x86_target(p->func) != X86_32) + x64_mov64(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 6)); + else + x86_mov(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 6)); + + /* Load instance ID. + */ + if (p->use_instancing) { + x86_mov(p->func, + p->tmp2_EDX, + x86_fn_arg(p->func, 4)); + x86_mov(p->func, + x86_make_disp(p->machine_EDI, get_offset(p, &p->start_instance)), + p->tmp2_EDX); + + x86_mov(p->func, + p->tmp_EAX, + x86_fn_arg(p->func, 5)); + x86_mov(p->func, + x86_make_disp(p->machine_EDI, get_offset(p, &p->instance_id)), + p->tmp_EAX); + } + + /* Get vertex count, compare to zero + */ + x86_xor(p->func, p->tmp_EAX, p->tmp_EAX); + x86_cmp(p->func, p->count_EBP, p->tmp_EAX); + fixup = x86_jcc_forward(p->func, cc_E); + + /* always load, needed or not: + */ + init_inputs(p, index_size); + + /* Note address for loop jump + */ + label = x86_get_label(p->func); + { + struct x86_reg elt = !index_size ? p->idx_ESI : x86_deref(p->idx_ESI); + int last_variant = -1; + struct x86_reg vb; + + for (j = 0; j < p->translate.key.nr_elements; j++) { + const struct translate_element *a = &p->translate.key.element[j]; + unsigned variant = p->element_to_buffer_variant[j]; + + /* Figure out source pointer address: + */ + if (variant != last_variant) { + last_variant = variant; + vb = get_buffer_ptr(p, index_size, variant, elt); + } + + if (!translate_attr( p, a, + x86_make_disp(vb, a->input_offset), + x86_make_disp(p->outbuf_EBX, a->output_offset))) + return FALSE; + } + + /* Next output vertex: + */ + x64_rexw(p->func); + x86_lea(p->func, + p->outbuf_EBX, + x86_make_disp(p->outbuf_EBX, + p->translate.key.output_stride)); + + /* Incr index + */ + incr_inputs( p, index_size ); + } + + /* decr count, loop if not zero + */ + x86_dec(p->func, p->count_EBP); + x86_jcc(p->func, cc_NZ, label); + + /* Exit mmx state? + */ + if (p->func->need_emms) + mmx_emms(p->func); + + /* Land forward jump here: + */ + x86_fixup_fwd_jump(p->func, fixup); + + /* Pop regs and return + */ + + if(x86_target(p->func) != X86_64_STD_ABI) + { + x86_pop(p->func, p->idx_ESI); + x86_pop(p->func, p->machine_EDI); + } + + x86_pop(p->func, p->count_EBP); + x86_pop(p->func, p->outbuf_EBX); + + if(x86_target(p->func) == X86_64_WIN64_ABI) + { + sse2_movdqa(p->func, x86_make_reg(file_XMM, 6), x86_make_disp(x86_make_reg(file_REG32, reg_SP), 8)); + sse2_movdqa(p->func, x86_make_reg(file_XMM, 7), x86_make_disp(x86_make_reg(file_REG32, reg_SP), 24)); + } + x86_ret(p->func); + + return TRUE; +} + + + + + + + +static void translate_sse_set_buffer( struct translate *translate, + unsigned buf, + const void *ptr, + unsigned stride, + unsigned max_index ) +{ + struct translate_sse *p = (struct translate_sse *)translate; + + if (buf < p->nr_buffers) { + p->buffer[buf].base_ptr = (char *)ptr; + p->buffer[buf].stride = stride; + p->buffer[buf].max_index = max_index; + } + + if (0) debug_printf("%s %d/%d: %p %d\n", + __FUNCTION__, buf, + p->nr_buffers, + ptr, stride); +} + + +static void translate_sse_release( struct translate *translate ) +{ + struct translate_sse *p = (struct translate_sse *)translate; + + x86_release_func( &p->elt8_func ); + x86_release_func( &p->elt16_func ); + x86_release_func( &p->elt_func ); + x86_release_func( &p->linear_func ); + + os_free_aligned(p); +} + + +struct translate *translate_sse2_create( const struct translate_key *key ) +{ + struct translate_sse *p = NULL; + unsigned i; + + /* this is misnamed, it actually refers to whether rtasm is enabled or not */ + if (!rtasm_cpu_has_sse()) + goto fail; + + p = os_malloc_aligned(sizeof(struct translate_sse), 16); + if (p == NULL) + goto fail; + memset(p, 0, sizeof(*p)); + memcpy(p->consts, consts, sizeof(consts)); + + p->translate.key = *key; + p->translate.release = translate_sse_release; + p->translate.set_buffer = translate_sse_set_buffer; + + for (i = 0; i < key->nr_elements; i++) { + if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) { + unsigned j; + + p->nr_buffers = MAX2(p->nr_buffers, key->element[i].input_buffer + 1); + + if (key->element[i].instance_divisor) { + p->use_instancing = TRUE; + } + + /* + * Map vertex element to vertex buffer variant. + */ + for (j = 0; j < p->nr_buffer_variants; j++) { + if (p->buffer_variant[j].buffer_index == key->element[i].input_buffer && + p->buffer_variant[j].instance_divisor == key->element[i].instance_divisor) { + break; + } + } + if (j == p->nr_buffer_variants) { + p->buffer_variant[j].buffer_index = key->element[i].input_buffer; + p->buffer_variant[j].instance_divisor = key->element[i].instance_divisor; + p->nr_buffer_variants++; + } + p->element_to_buffer_variant[i] = j; + } else { + assert(key->element[i].type == TRANSLATE_ELEMENT_INSTANCE_ID); + + p->element_to_buffer_variant[i] = ELEMENT_BUFFER_INSTANCE_ID; + } + } + + if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers); + + if (!build_vertex_emit(p, &p->linear_func, 0)) + goto fail; + + if (!build_vertex_emit(p, &p->elt_func, 4)) + goto fail; + + if (!build_vertex_emit(p, &p->elt16_func, 2)) + goto fail; + + if (!build_vertex_emit(p, &p->elt8_func, 1)) + goto fail; + + p->translate.run = (run_func) x86_get_func(&p->linear_func); + if (p->translate.run == NULL) + goto fail; + + p->translate.run_elts = (run_elts_func) x86_get_func(&p->elt_func); + if (p->translate.run_elts == NULL) + goto fail; + + p->translate.run_elts16 = (run_elts16_func) x86_get_func(&p->elt16_func); + if (p->translate.run_elts16 == NULL) + goto fail; + + p->translate.run_elts8 = (run_elts8_func) x86_get_func(&p->elt8_func); + if (p->translate.run_elts8 == NULL) + goto fail; + + return &p->translate; + + fail: + if (p) + translate_sse_release( &p->translate ); + + return NULL; +} + + + +#else + +struct translate *translate_sse2_create( const struct translate_key *key ) +{ + return NULL; +} + +#endif diff --git a/drivers/video/Gallium/include/pipe/p_config.h b/drivers/video/Gallium/include/pipe/p_config.h index 6b51160af6..60f3205fc2 100644 --- a/drivers/video/Gallium/include/pipe/p_config.h +++ b/drivers/video/Gallium/include/pipe/p_config.h @@ -1,8 +1,8 @@ /************************************************************************** - * + * * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,24 +22,24 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ /** * @file * Gallium configuration defines. - * - * This header file sets several defines based on the compiler, processor - * architecture, and operating system being used. These defines should be used - * throughout the code to facilitate porting to new platforms. It is likely that - * this file is auto-generated by an autoconf-like tool at some point, as some - * things cannot be determined by pre-defined environment alone. - * + * + * This header file sets several defines based on the compiler, processor + * architecture, and operating system being used. These defines should be used + * throughout the code to facilitate porting to new platforms. It is likely that + * this file is auto-generated by an autoconf-like tool at some point, as some + * things cannot be determined by pre-defined environment alone. + * * See also: * - http://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html * - echo | gcc -dM -E - | sort * - http://msdn.microsoft.com/en-us/library/b0084kay.aspx - * + * * @author José Fonseca */ @@ -61,8 +61,8 @@ * - 1400: Visual C++ 2005 * - 1310: Visual C++ .NET 2003 * - 1300: Visual C++ .NET 2002 - * - * __MSC__ seems to be an old macro -- it is not pre-defined on recent MSVC + * + * __MSC__ seems to be an old macro -- it is not pre-defined on recent MSVC * versions. */ #if defined(_MSC_VER) || defined(__MSC__) @@ -162,7 +162,7 @@ /* * Auto-detect the operating system family. - * + * * See subsystem below for a more fine-grained distinction. */ @@ -212,10 +212,6 @@ #define PIPE_OS_UNIX #endif -#if defined(_WIN32) || defined(WIN32) -#define PIPE_OS_WINDOWS -#endif - #if defined(__HAIKU__) #define PIPE_OS_HAIKU #define PIPE_OS_UNIX @@ -228,7 +224,7 @@ /* * Try to auto-detect the subsystem. - * + * * NOTE: There is no way to auto-detect most of these. */