diff --git a/programs/develop/libraries/newlib/sdk/fasm/include/pixman-1.inc b/programs/develop/libraries/newlib/sdk/fasm/include/pixman-1.inc new file mode 100644 index 0000000000..a59c2b2679 --- /dev/null +++ b/programs/develop/libraries/newlib/sdk/fasm/include/pixman-1.inc @@ -0,0 +1,148 @@ +import pixman-1,\ + _pixman_internal_only_get_implementation,'_pixman_internal_only_get_implementation',\ + pixman_add_trapezoids,'pixman_add_trapezoids',\ + pixman_add_traps,'pixman_add_traps',\ + pixman_add_triangles,'pixman_add_triangles',\ + pixman_blt,'pixman_blt',\ + pixman_composite_glyphs,'pixman_composite_glyphs',\ + pixman_composite_glyphs_no_mask,'pixman_composite_glyphs_no_mask',\ + pixman_composite_trapezoids,'pixman_composite_trapezoids',\ + pixman_composite_triangles,'pixman_composite_triangles',\ + pixman_compute_composite_region,'pixman_compute_composite_region',\ + pixman_disable_out_of_bounds_workaround,'pixman_disable_out_of_bounds_workaround',\ + pixman_edge_init,'pixman_edge_init',\ + pixman_edge_step,'pixman_edge_step',\ + pixman_f_transform_bounds,'pixman_f_transform_bounds',\ + pixman_f_transform_from_pixman_transform,'pixman_f_transform_from_pixman_transform',\ + pixman_f_transform_init_identity,'pixman_f_transform_init_identity',\ + pixman_f_transform_init_rotate,'pixman_f_transform_init_rotate',\ + pixman_f_transform_init_scale,'pixman_f_transform_init_scale',\ + pixman_f_transform_init_translate,'pixman_f_transform_init_translate',\ + pixman_f_transform_invert,'pixman_f_transform_invert',\ + pixman_f_transform_multiply,'pixman_f_transform_multiply',\ + pixman_f_transform_point,'pixman_f_transform_point',\ + pixman_f_transform_point_3d,'pixman_f_transform_point_3d',\ + pixman_f_transform_rotate,'pixman_f_transform_rotate',\ + pixman_f_transform_scale,'pixman_f_transform_scale',\ + pixman_f_transform_translate,'pixman_f_transform_translate',\ + pixman_fill,'pixman_fill',\ + pixman_filter_create_separable_convolution,'pixman_filter_create_separable_convolution',\ + pixman_format_supported_destination,'pixman_format_supported_destination',\ + pixman_format_supported_source,'pixman_format_supported_source',\ + pixman_glyph_cache_create,'pixman_glyph_cache_create',\ + pixman_glyph_cache_destroy,'pixman_glyph_cache_destroy',\ + pixman_glyph_cache_freeze,'pixman_glyph_cache_freeze',\ + pixman_glyph_cache_insert,'pixman_glyph_cache_insert',\ + pixman_glyph_cache_lookup,'pixman_glyph_cache_lookup',\ + pixman_glyph_cache_remove,'pixman_glyph_cache_remove',\ + pixman_glyph_cache_thaw,'pixman_glyph_cache_thaw',\ + pixman_glyph_get_extents,'pixman_glyph_get_extents',\ + pixman_glyph_get_mask_format,'pixman_glyph_get_mask_format',\ + pixman_image_composite,'pixman_image_composite',\ + pixman_image_composite32,'pixman_image_composite32',\ + pixman_image_create_bits,'pixman_image_create_bits',\ + pixman_image_create_bits_no_clear,'pixman_image_create_bits_no_clear',\ + pixman_image_create_conical_gradient,'pixman_image_create_conical_gradient',\ + pixman_image_create_linear_gradient,'pixman_image_create_linear_gradient',\ + pixman_image_create_radial_gradient,'pixman_image_create_radial_gradient',\ + pixman_image_create_solid_fill,'pixman_image_create_solid_fill',\ + pixman_image_fill_boxes,'pixman_image_fill_boxes',\ + pixman_image_fill_rectangles,'pixman_image_fill_rectangles',\ + pixman_image_get_component_alpha,'pixman_image_get_component_alpha',\ + pixman_image_get_data,'pixman_image_get_data',\ + pixman_image_get_depth,'pixman_image_get_depth',\ + pixman_image_get_destroy_data,'pixman_image_get_destroy_data',\ + pixman_image_get_format,'pixman_image_get_format',\ + pixman_image_get_height,'pixman_image_get_height',\ + pixman_image_get_stride,'pixman_image_get_stride',\ + pixman_image_get_width,'pixman_image_get_width',\ + pixman_image_ref,'pixman_image_ref',\ + pixman_image_set_accessors,'pixman_image_set_accessors',\ + pixman_image_set_alpha_map,'pixman_image_set_alpha_map',\ + pixman_image_set_clip_region,'pixman_image_set_clip_region',\ + pixman_image_set_clip_region32,'pixman_image_set_clip_region32',\ + pixman_image_set_component_alpha,'pixman_image_set_component_alpha',\ + pixman_image_set_destroy_function,'pixman_image_set_destroy_function',\ + pixman_image_set_filter,'pixman_image_set_filter',\ + pixman_image_set_has_client_clip,'pixman_image_set_has_client_clip',\ + pixman_image_set_indexed,'pixman_image_set_indexed',\ + pixman_image_set_repeat,'pixman_image_set_repeat',\ + pixman_image_set_source_clipping,'pixman_image_set_source_clipping',\ + pixman_image_set_transform,'pixman_image_set_transform',\ + pixman_image_unref,'pixman_image_unref',\ + pixman_line_fixed_edge_init,'pixman_line_fixed_edge_init',\ + pixman_rasterize_edges,'pixman_rasterize_edges',\ + pixman_rasterize_trapezoid,'pixman_rasterize_trapezoid',\ + pixman_region32_clear,'pixman_region32_clear',\ + pixman_region32_contains_point,'pixman_region32_contains_point',\ + pixman_region32_contains_rectangle,'pixman_region32_contains_rectangle',\ + pixman_region32_copy,'pixman_region32_copy',\ + pixman_region32_equal,'pixman_region32_equal',\ + pixman_region32_extents,'pixman_region32_extents',\ + pixman_region32_fini,'pixman_region32_fini',\ + pixman_region32_init,'pixman_region32_init',\ + pixman_region32_init_from_image,'pixman_region32_init_from_image',\ + pixman_region32_init_rect,'pixman_region32_init_rect',\ + pixman_region32_init_rects,'pixman_region32_init_rects',\ + pixman_region32_init_with_extents,'pixman_region32_init_with_extents',\ + pixman_region32_intersect,'pixman_region32_intersect',\ + pixman_region32_intersect_rect,'pixman_region32_intersect_rect',\ + pixman_region32_inverse,'pixman_region32_inverse',\ + pixman_region32_n_rects,'pixman_region32_n_rects',\ + pixman_region32_not_empty,'pixman_region32_not_empty',\ + pixman_region32_rectangles,'pixman_region32_rectangles',\ + pixman_region32_reset,'pixman_region32_reset',\ + pixman_region32_selfcheck,'pixman_region32_selfcheck',\ + pixman_region32_subtract,'pixman_region32_subtract',\ + pixman_region32_translate,'pixman_region32_translate',\ + pixman_region32_union,'pixman_region32_union',\ + pixman_region32_union_rect,'pixman_region32_union_rect',\ + pixman_region_clear,'pixman_region_clear',\ + pixman_region_contains_point,'pixman_region_contains_point',\ + pixman_region_contains_rectangle,'pixman_region_contains_rectangle',\ + pixman_region_copy,'pixman_region_copy',\ + pixman_region_equal,'pixman_region_equal',\ + pixman_region_extents,'pixman_region_extents',\ + pixman_region_fini,'pixman_region_fini',\ + pixman_region_init,'pixman_region_init',\ + pixman_region_init_from_image,'pixman_region_init_from_image',\ + pixman_region_init_rect,'pixman_region_init_rect',\ + pixman_region_init_rects,'pixman_region_init_rects',\ + pixman_region_init_with_extents,'pixman_region_init_with_extents',\ + pixman_region_intersect,'pixman_region_intersect',\ + pixman_region_intersect_rect,'pixman_region_intersect_rect',\ + pixman_region_inverse,'pixman_region_inverse',\ + pixman_region_n_rects,'pixman_region_n_rects',\ + pixman_region_not_empty,'pixman_region_not_empty',\ + pixman_region_rectangles,'pixman_region_rectangles',\ + pixman_region_reset,'pixman_region_reset',\ + pixman_region_selfcheck,'pixman_region_selfcheck',\ + pixman_region_set_static_pointers,'pixman_region_set_static_pointers',\ + pixman_region_subtract,'pixman_region_subtract',\ + pixman_region_translate,'pixman_region_translate',\ + pixman_region_union,'pixman_region_union',\ + pixman_region_union_rect,'pixman_region_union_rect',\ + pixman_sample_ceil_y,'pixman_sample_ceil_y',\ + pixman_sample_floor_y,'pixman_sample_floor_y',\ + pixman_transform_bounds,'pixman_transform_bounds',\ + pixman_transform_from_pixman_f_transform,'pixman_transform_from_pixman_f_transform',\ + pixman_transform_init_identity,'pixman_transform_init_identity',\ + pixman_transform_init_rotate,'pixman_transform_init_rotate',\ + pixman_transform_init_scale,'pixman_transform_init_scale',\ + pixman_transform_init_translate,'pixman_transform_init_translate',\ + pixman_transform_invert,'pixman_transform_invert',\ + pixman_transform_is_identity,'pixman_transform_is_identity',\ + pixman_transform_is_int_translate,'pixman_transform_is_int_translate',\ + pixman_transform_is_inverse,'pixman_transform_is_inverse',\ + pixman_transform_is_scale,'pixman_transform_is_scale',\ + pixman_transform_multiply,'pixman_transform_multiply',\ + pixman_transform_point,'pixman_transform_point',\ + pixman_transform_point_31_16,'pixman_transform_point_31_16',\ + pixman_transform_point_31_16_3d,'pixman_transform_point_31_16_3d',\ + pixman_transform_point_31_16_affine,'pixman_transform_point_31_16_affine',\ + pixman_transform_point_3d,'pixman_transform_point_3d',\ + pixman_transform_rotate,'pixman_transform_rotate',\ + pixman_transform_scale,'pixman_transform_scale',\ + pixman_transform_translate,'pixman_transform_translate',\ + pixman_version,'pixman_version',\ + pixman_version_string,'pixman_version_string' diff --git a/programs/develop/libraries/pixman/COPYING b/programs/develop/libraries/pixman/COPYING new file mode 100644 index 0000000000..6168dea56f --- /dev/null +++ b/programs/develop/libraries/pixman/COPYING @@ -0,0 +1,42 @@ +The following is the MIT license, agreed upon by most contributors. +Copyright holders of new code should use this license statement where +possible. They may also add themselves to the list below. + +/* + * Copyright 1987, 1988, 1989, 1998 The Open Group + * Copyright 1987, 1988, 1989 Digital Equipment Corporation + * Copyright 1999, 2004, 2008 Keith Packard + * Copyright 2000 SuSE, Inc. + * Copyright 2000 Keith Packard, member of The XFree86 Project, Inc. + * Copyright 2004, 2005, 2007, 2008, 2009, 2010 Red Hat, Inc. + * Copyright 2004 Nicholas Miell + * Copyright 2005 Lars Knoll & Zack Rusin, Trolltech + * Copyright 2005 Trolltech AS + * Copyright 2007 Luca Barbato + * Copyright 2008 Aaron Plattner, NVIDIA Corporation + * Copyright 2008 Rodrigo Kumpera + * Copyright 2008 André Tupinambá + * Copyright 2008 Mozilla Corporation + * Copyright 2008 Frederic Plourde + * Copyright 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright 2009, 2010 Nokia Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ diff --git a/programs/develop/libraries/pixman/Makefile b/programs/develop/libraries/pixman/Makefile index 36501a32f0..c063741e92 100644 --- a/programs/develop/libraries/pixman/Makefile +++ b/programs/develop/libraries/pixman/Makefile @@ -2,45 +2,52 @@ LIBRARY = pixman-1 CC = gcc +CFLAGS = -U_Win32 -U_WIN32 -U__MINGW32__ -c -O2 -Wall -Winline -fomit-frame-pointer -CFLAGS = -c -O2 -mmmx -Winline -fomit-frame-pointer +LD = ld +LDFLAGS = -shared -s -nostdlib -T ../newlib/dll.lds --entry _DllStartup --image-base=0 --out-implib $(LIBRARY).dll.a -LDIMPORT:= -nostdlib --out-implib libpiximp.a --exclude-libs libamz.a -LDFLAGS:= -shared -s -T ../newlib/dll.lds --image-base 0 +STRIP = $(PREFIX)strip -DEFINES = -DHAVE_CONFIG_H -DPIXMAN_NO_TLS -DUSE_MMX - -INCLUDES = -I../pixman -I../newlib/include +INCLUDES= -I. -I../newlib/include LIBPATH:= -L../newlib -LIBS:= -lamz -lgcc -lcimp +LIBS:= -ldll -lc.dll -lgcc + +DEFINES = -DHAVE_CONFIG_H + SOURCES = \ - pixman-image.c \ + pixman.c \ pixman-access.c \ pixman-access-accessors.c \ - pixman-region16.c \ - pixman-region32.c \ + pixman-bits-image.c \ pixman-combine32.c \ - pixman-combine64.c \ - pixman-utils.c \ + pixman-combine-float.c \ + pixman-conical-gradient.c \ pixman-edge.c \ pixman-edge-accessors.c \ - pixman-trap.c \ - pixman-timer.c \ - pixman-matrix.c \ - pixman-gradient-walker.c \ - pixman-linear-gradient.c \ - pixman-radial-gradient.c \ - pixman-bits-image.c \ - pixman.c \ - pixman-cpu.c \ pixman-fast-path.c \ - pixman-implementation.c \ - pixman-solid-fill.c \ + pixman-filter.c \ pixman-general.c \ + pixman-glyph.c \ + pixman-gradient-walker.c \ + pixman-image.c \ + pixman-implementation.c \ + pixman-linear-gradient.c \ + pixman-matrix.c \ + pixman-noop.c \ + pixman-radial-gradient.c \ + pixman-region16.c \ + pixman-region32.c \ + pixman-solid-fill.c \ + pixman-timer.c \ + pixman-trap.c \ + pixman-utils.c \ + pixman-x86.c \ pixman-mmx.c \ + pixman-sse2.c \ $(NULL) OBJECTS = $(patsubst %.c, %.o, $(SOURCES)) @@ -53,13 +60,21 @@ all:$(LIBRARY).a $(LIBRARY).dll $(LIBRARY).a: $(OBJECTS) Makefile ar cvrs $(LIBRARY).a $(OBJECTS) -$(LIBRARY).dll: $(OBJECTS) Makefile - ld $(LDFLAGS) $(LDIMPORT) $(LIBPATH) -o $@ $(OBJECTS) $(LIBS) +$(LIBRARY).dll: $(LIBRARY).def $(OBJECTS) Makefile + $(LD) $(LDFLAGS) $(LIBPATH) -o $@ $(LIBRARY).def $(OBJECTS) $(LIBS) + $(STRIP) $@ + sed -f ../newlib/cmd1.sed $(LIBRARY).def > mem + sed -f ../newlib/cmd2.sed mem >$(LIBRARY).inc - -%.o: %.c $(SOURCES) Makefile +%.o : %.c Makefile $(CC) $(CFLAGS) $(DEFINES) $(INCLUDES) -o $@ $< +pixman-mmx.o: pixman-mmx.c Makefile + $(CC) $(CFLAGS) -mmmx $(DEFINES) $(INCLUDES) -o $@ $< + +pixman-sse2.o: pixman-sse2.c Makefile + $(CC) $(CFLAGS) -msse2 $(DEFINES) $(INCLUDES) -o $@ $< + clean: -rm -f *.o diff --git a/programs/develop/libraries/pixman/README b/programs/develop/libraries/pixman/README index 3cfbc5053e..6d8cfd8ad5 100644 --- a/programs/develop/libraries/pixman/README +++ b/programs/develop/libraries/pixman/README @@ -1,22 +1,116 @@ -pixman is a library that provides low-level pixel manipulation +Pixman is a library that provides low-level pixel manipulation features such as image compositing and trapezoid rasterization. -All questions regarding this software should be directed to the pixman +Questions, bug reports and patches should be directed to the pixman mailing list: http://lists.freedesktop.org/mailman/listinfo/pixman -Please send patches and bug reports either to the mailing list above, -or file them at the freedesktop bug tracker: +You can also file bugs at https://bugs.freedesktop.org/enter_bug.cgi?product=pixman -The master development code repository can be found at: +For real time discussions about pixman, feel free to join the IRC +channels #cairo and #xorg-devel on the FreeNode IRC network. + + +Contributing +------------ + +In order to contribute to pixman, you will need a working knowledge of +the git version control system. For a quick getting started guide, +there is the "Everyday Git With 20 Commands Or So guide" + + http://www.kernel.org/pub/software/scm/git/docs/everyday.html + +from the Git homepage. For more in depth git documentation, see the +resources on the Git community documentation page: + + http://git-scm.com/documentation + +Pixman uses the infrastructure from the freedesktop.org umbrella +project. For instructions about how to use the git service on +freedesktop.org, see: + + http://www.freedesktop.org/wiki/Infrastructure/git/Developers + +The Pixman master repository can be found at: git://anongit.freedesktop.org/git/pixman - http://gitweb.freedesktop.org/?p=pixman;a=summary +and browsed on the web here: -For more information on the git code manager, see: + http://cgit.freedesktop.org/pixman/ - http://wiki.x.org/wiki/GitPage + +Sending patches +--------------- + +The general workflow for sending patches is to first make sure that +git can send mail on your system. Then, + + - create a branch off of master in your local git repository + + - make your changes as one or more commits + + - use the + + git send-email + + command to send the patch series to pixman@lists.freedesktop.org. + +In order for your patches to be accepted, please consider the +following guidelines: + + - This link: + + http://www.kernel.org/pub/software/scm/git/docs/user-manual.html#patch-series + + describes how what a good patch series is, and to create one with + git. + + - At each point in the series, pixman should compile and the test + suite should pass. + + The exception here is if you are changing the test suite to + demonstrate a bug. In this case, make one commit that makes the + test suite fail due to the bug, and then another commit that fixes + the bug. + + You can run the test suite with + + make check + + It will take around two minutes to run on a modern PC. + + - Follow the coding style described in the CODING_STYLE file + + - For bug fixes, include an update to the test suite to make sure + the bug doesn't reappear. + + - For new features, add tests of the feature to the test + suite. Also, add a program demonstrating the new feature to the + demos/ directory. + + - Write descriptive commit messages. Useful information to include: + - Benchmark results, before and after + - Description of the bug that was fixed + - Detailed rationale for any new API + - Alternative approaches that were rejected (and why they + don't work) + - If review comments were incorporated, a brief version + history describing what those changes were. + + - For big patch series, send an introductory email with an overall + description of the patch series, including benchmarks and + motivation. Each commit message should still be descriptive and + include enough information to understand why this particular commit + was necessary. + +Pixman has high standards for code quality and so almost everybody +should expect to have the first versions of their patches rejected. + +If you think that the reviewers are wrong about something, or that the +guidelines above are wrong, feel free to discuss the issue on the +list. The purpose of the guidelines and code review is to ensure high +code quality; it is not an exercise in compliance. diff --git a/programs/develop/libraries/pixman/config.h b/programs/develop/libraries/pixman/config.h index c57fa62ed3..d7651c76a9 100644 --- a/programs/develop/libraries/pixman/config.h +++ b/programs/develop/libraries/pixman/config.h @@ -10,6 +10,15 @@ /* Define to 1 if you have the header file. */ /* #undef HAVE_DLFCN_H */ +/* Whether we have feenableexcept() */ +/* #undef HAVE_FEENABLEEXCEPT */ + +/* Define to 1 if we have */ +#define HAVE_FENV_H 1 + +/* Whether the tool chain supports __float128 */ +#define HAVE_FLOAT128 /**/ + /* Define to 1 if you have the `getisax' function. */ /* #undef HAVE_GETISAX */ @@ -25,9 +34,15 @@ /* Define to 1 if you have the `pixman-1' library (-lpixman-1). */ /* #undef HAVE_LIBPIXMAN_1 */ +/* Whether we have libpng */ +/* #undef HAVE_LIBPNG */ + /* Define to 1 if you have the header file. */ #define HAVE_MEMORY_H 1 +/* Whether we have mmap() */ +#define HAVE_MMAP + /* Whether we have mprotect() */ #define HAVE_MPROTECT 1 @@ -72,13 +87,13 @@ #define PACKAGE "pixman" /* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT ""pixman@lists.freedesktop.org"" +#define PACKAGE_BUGREPORT "pixman@lists.freedesktop.org" /* Define to the full name of this package. */ #define PACKAGE_NAME "pixman" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "pixman 0.20.2" +#define PACKAGE_STRING "pixman 0.30.2" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "pixman" @@ -87,7 +102,7 @@ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "0.20.2" +#define PACKAGE_VERSION "0.30.2" /* enable TIMER_BEGIN/TIMER_END macros */ /* #undef PIXMAN_TIMERS */ @@ -98,8 +113,14 @@ /* Define to 1 if you have the ANSI C header files. */ #define STDC_HEADERS 1 -/* Whether the tool chain supports __thread */ -//#define TOOLCHAIN_SUPPORTS__THREAD /**/ +/* The compiler supported TLS storage class */ +#define TLS __thread + +/* Whether the tool chain supports __attribute__((constructor)) */ +#define TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR /**/ + +/* use ARM IWMMXT compiler intrinsics */ +/* #undef USE_ARM_IWMMXT */ /* use ARM NEON assembly optimizations */ /* #undef USE_ARM_NEON */ @@ -110,20 +131,26 @@ /* use GNU-style inline assembler */ #define USE_GCC_INLINE_ASM 1 -/* use MMX compiler intrinsics */ -#define USE_MMX 1 +/* use Loongson Multimedia Instructions */ +/* #undef USE_LOONGSON_MMI */ + +/* use MIPS DSPr2 assembly optimizations */ +/* #undef USE_MIPS_DSPR2 */ /* use OpenMP in the test suite */ -//#define USE_OPENMP 1 +/* #undef USE_OPENMP */ /* use SSE2 compiler intrinsics */ -//#define USE_SSE2 1 +#define USE_SSE2 1 /* use VMX compiler intrinsics */ /* #undef USE_VMX */ +/* use x86 MMX compiler intrinsics */ +#define USE_X86_MMX 1 + /* Version number of package */ -#define VERSION "0.20.2" +#define VERSION "0.30.2" /* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most significant byte first (like Motorola and SPARC, unlike Intel). */ @@ -142,3 +169,6 @@ #ifndef __cplusplus /* #undef inline */ #endif + +/* Define to sqrt if you do not have the `sqrtf' function. */ +/* #undef sqrtf */ diff --git a/programs/develop/libraries/pixman/pixman-1.def b/programs/develop/libraries/pixman/pixman-1.def new file mode 100644 index 0000000000..ee57353797 --- /dev/null +++ b/programs/develop/libraries/pixman/pixman-1.def @@ -0,0 +1,148 @@ +EXPORTS + _pixman_internal_only_get_implementation + pixman_add_trapezoids + pixman_add_traps + pixman_add_triangles + pixman_blt + pixman_composite_glyphs + pixman_composite_glyphs_no_mask + pixman_composite_trapezoids + pixman_composite_triangles + pixman_compute_composite_region + pixman_disable_out_of_bounds_workaround + pixman_edge_init + pixman_edge_step + pixman_f_transform_bounds + pixman_f_transform_from_pixman_transform + pixman_f_transform_init_identity + pixman_f_transform_init_rotate + pixman_f_transform_init_scale + pixman_f_transform_init_translate + pixman_f_transform_invert + pixman_f_transform_multiply + pixman_f_transform_point + pixman_f_transform_point_3d + pixman_f_transform_rotate + pixman_f_transform_scale + pixman_f_transform_translate + pixman_fill + pixman_filter_create_separable_convolution + pixman_format_supported_destination + pixman_format_supported_source + pixman_glyph_cache_create + pixman_glyph_cache_destroy + pixman_glyph_cache_freeze + pixman_glyph_cache_insert + pixman_glyph_cache_lookup + pixman_glyph_cache_remove + pixman_glyph_cache_thaw + pixman_glyph_get_extents + pixman_glyph_get_mask_format + pixman_image_composite + pixman_image_composite32 + pixman_image_create_bits + pixman_image_create_bits_no_clear + pixman_image_create_conical_gradient + pixman_image_create_linear_gradient + pixman_image_create_radial_gradient + pixman_image_create_solid_fill + pixman_image_fill_boxes + pixman_image_fill_rectangles + pixman_image_get_component_alpha + pixman_image_get_data + pixman_image_get_depth + pixman_image_get_destroy_data + pixman_image_get_format + pixman_image_get_height + pixman_image_get_stride + pixman_image_get_width + pixman_image_ref + pixman_image_set_accessors + pixman_image_set_alpha_map + pixman_image_set_clip_region + pixman_image_set_clip_region32 + pixman_image_set_component_alpha + pixman_image_set_destroy_function + pixman_image_set_filter + pixman_image_set_has_client_clip + pixman_image_set_indexed + pixman_image_set_repeat + pixman_image_set_source_clipping + pixman_image_set_transform + pixman_image_unref + pixman_line_fixed_edge_init + pixman_rasterize_edges + pixman_rasterize_trapezoid + pixman_region32_clear + pixman_region32_contains_point + pixman_region32_contains_rectangle + pixman_region32_copy + pixman_region32_equal + pixman_region32_extents + pixman_region32_fini + pixman_region32_init + pixman_region32_init_from_image + pixman_region32_init_rect + pixman_region32_init_rects + pixman_region32_init_with_extents + pixman_region32_intersect + pixman_region32_intersect_rect + pixman_region32_inverse + pixman_region32_n_rects + pixman_region32_not_empty + pixman_region32_rectangles + pixman_region32_reset + pixman_region32_selfcheck + pixman_region32_subtract + pixman_region32_translate + pixman_region32_union + pixman_region32_union_rect + pixman_region_clear + pixman_region_contains_point + pixman_region_contains_rectangle + pixman_region_copy + pixman_region_equal + pixman_region_extents + pixman_region_fini + pixman_region_init + pixman_region_init_from_image + pixman_region_init_rect + pixman_region_init_rects + pixman_region_init_with_extents + pixman_region_intersect + pixman_region_intersect_rect + pixman_region_inverse + pixman_region_n_rects + pixman_region_not_empty + pixman_region_rectangles + pixman_region_reset + pixman_region_selfcheck + pixman_region_set_static_pointers + pixman_region_subtract + pixman_region_translate + pixman_region_union + pixman_region_union_rect + pixman_sample_ceil_y + pixman_sample_floor_y + pixman_transform_bounds + pixman_transform_from_pixman_f_transform + pixman_transform_init_identity + pixman_transform_init_rotate + pixman_transform_init_scale + pixman_transform_init_translate + pixman_transform_invert + pixman_transform_is_identity + pixman_transform_is_int_translate + pixman_transform_is_inverse + pixman_transform_is_scale + pixman_transform_multiply + pixman_transform_point + pixman_transform_point_31_16 + pixman_transform_point_31_16_3d + pixman_transform_point_31_16_affine + pixman_transform_point_3d + pixman_transform_rotate + pixman_transform_scale + pixman_transform_translate + pixman_version + pixman_version_string diff --git a/programs/develop/libraries/pixman/pixman-access.c b/programs/develop/libraries/pixman/pixman-access.c index f1ce0ba400..b5c8e4017a 100644 --- a/programs/develop/libraries/pixman/pixman-access.c +++ b/programs/develop/libraries/pixman/pixman-access.c @@ -31,9 +31,10 @@ #include #include #include +#include -#include "pixman-private.h" #include "pixman-accessor.h" +#include "pixman-private.h" #define CONVERT_RGB24_TO_Y15(s) \ (((((s) >> 16) & 0xff) * 153 + \ @@ -45,14 +46,119 @@ (((s) >> 6) & 0x03e0) | \ (((s) >> 9) & 0x7c00)) -#define RGB15_TO_ENTRY(mif,rgb15) \ - ((mif)->ent[rgb15]) +/* Fetch macros */ -#define RGB24_TO_ENTRY(mif,rgb24) \ - RGB15_TO_ENTRY (mif,CONVERT_RGB24_TO_RGB15 (rgb24)) +#ifdef WORDS_BIGENDIAN +#define FETCH_1(img,l,o) \ + (((READ ((img), ((uint32_t *)(l)) + ((o) >> 5))) >> (0x1f - ((o) & 0x1f))) & 0x1) +#else +#define FETCH_1(img,l,o) \ + ((((READ ((img), ((uint32_t *)(l)) + ((o) >> 5))) >> ((o) & 0x1f))) & 0x1) +#endif -#define RGB24_TO_ENTRY_Y(mif,rgb24) \ - ((mif)->ent[CONVERT_RGB24_TO_Y15 (rgb24)]) +#define FETCH_8(img,l,o) (READ (img, (((uint8_t *)(l)) + ((o) >> 3)))) + +#ifdef WORDS_BIGENDIAN +#define FETCH_4(img,l,o) \ + (((4 * (o)) & 4) ? (FETCH_8 (img,l, 4 * (o)) & 0xf) : (FETCH_8 (img,l,(4 * (o))) >> 4)) +#else +#define FETCH_4(img,l,o) \ + (((4 * (o)) & 4) ? (FETCH_8 (img, l, 4 * (o)) >> 4) : (FETCH_8 (img, l, (4 * (o))) & 0xf)) +#endif + +#ifdef WORDS_BIGENDIAN +#define FETCH_24(img,l,o) \ + ((READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 16) | \ + (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8) | \ + (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 0)) +#else +#define FETCH_24(img,l,o) \ + ((READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 0) | \ + (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8) | \ + (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 16)) +#endif + +/* Store macros */ + +#ifdef WORDS_BIGENDIAN +#define STORE_1(img,l,o,v) \ + do \ + { \ + uint32_t *__d = ((uint32_t *)(l)) + ((o) >> 5); \ + uint32_t __m, __v; \ + \ + __m = 1 << (0x1f - ((o) & 0x1f)); \ + __v = (v)? __m : 0; \ + \ + WRITE((img), __d, (READ((img), __d) & ~__m) | __v); \ + } \ + while (0) +#else +#define STORE_1(img,l,o,v) \ + do \ + { \ + uint32_t *__d = ((uint32_t *)(l)) + ((o) >> 5); \ + uint32_t __m, __v; \ + \ + __m = 1 << ((o) & 0x1f); \ + __v = (v)? __m : 0; \ + \ + WRITE((img), __d, (READ((img), __d) & ~__m) | __v); \ + } \ + while (0) +#endif + +#define STORE_8(img,l,o,v) (WRITE (img, (uint8_t *)(l) + ((o) >> 3), (v))) + +#ifdef WORDS_BIGENDIAN +#define STORE_4(img,l,o,v) \ + do \ + { \ + int bo = 4 * (o); \ + int v4 = (v) & 0x0f; \ + \ + STORE_8 (img, l, bo, ( \ + bo & 4 ? \ + (FETCH_8 (img, l, bo) & 0xf0) | (v4) : \ + (FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4))); \ + } while (0) +#else +#define STORE_4(img,l,o,v) \ + do \ + { \ + int bo = 4 * (o); \ + int v4 = (v) & 0x0f; \ + \ + STORE_8 (img, l, bo, ( \ + bo & 4 ? \ + (FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4) : \ + (FETCH_8 (img, l, bo) & 0xf0) | (v4))); \ + } while (0) +#endif + +#ifdef WORDS_BIGENDIAN +#define STORE_24(img,l,o,v) \ + do \ + { \ + uint8_t *__tmp = (l) + 3 * (o); \ + \ + WRITE ((img), __tmp++, ((v) & 0x00ff0000) >> 16); \ + WRITE ((img), __tmp++, ((v) & 0x0000ff00) >> 8); \ + WRITE ((img), __tmp++, ((v) & 0x000000ff) >> 0); \ + } \ + while (0) +#else +#define STORE_24(img,l,o,v) \ + do \ + { \ + uint8_t *__tmp = (l) + 3 * (o); \ + \ + WRITE ((img), __tmp++, ((v) & 0x000000ff) >> 0); \ + WRITE ((img), __tmp++, ((v) & 0x0000ff00) >> 8); \ + WRITE ((img), __tmp++, ((v) & 0x00ff0000) >> 16); \ + } \ + while (0) +#endif /* * YV12 setup and access macros @@ -86,972 +192,543 @@ ((uint8_t *) ((bits) + offset0 + \ ((stride) >> 1) * ((line) >> 1))) -/********************************** Fetch ************************************/ +/* Misc. helpers */ -static void -fetch_scanline_a8r8g8b8 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) +static force_inline void +get_shifts (pixman_format_code_t format, + int *a, + int *r, + int *g, + int *b) { - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - - MEMCPY_WRAPPED (image, - buffer, (const uint32_t *)bits + x, - width * sizeof(uint32_t)); -} - -static void -fetch_scanline_x8r8g8b8 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint32_t *pixel = (const uint32_t *)bits + x; - const uint32_t *end = pixel + width; - - while (pixel < end) - *buffer++ = READ (image, pixel++) | 0xff000000; -} - -static void -fetch_scanline_a8b8g8r8 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint32_t *pixel = (uint32_t *)bits + x; - const uint32_t *end = pixel + width; - - while (pixel < end) + switch (PIXMAN_FORMAT_TYPE (format)) { - uint32_t p = READ (image, pixel++); - - *buffer++ = (p & 0xff00ff00) | - ((p >> 16) & 0xff) | - ((p & 0xff) << 16); + case PIXMAN_TYPE_A: + *b = 0; + *g = 0; + *r = 0; + *a = 0; + break; + + case PIXMAN_TYPE_ARGB: + case PIXMAN_TYPE_ARGB_SRGB: + *b = 0; + *g = *b + PIXMAN_FORMAT_B (format); + *r = *g + PIXMAN_FORMAT_G (format); + *a = *r + PIXMAN_FORMAT_R (format); + break; + + case PIXMAN_TYPE_ABGR: + *r = 0; + *g = *r + PIXMAN_FORMAT_R (format); + *b = *g + PIXMAN_FORMAT_G (format); + *a = *b + PIXMAN_FORMAT_B (format); + break; + + case PIXMAN_TYPE_BGRA: + /* With BGRA formats we start counting at the high end of the pixel */ + *b = PIXMAN_FORMAT_BPP (format) - PIXMAN_FORMAT_B (format); + *g = *b - PIXMAN_FORMAT_B (format); + *r = *g - PIXMAN_FORMAT_G (format); + *a = *r - PIXMAN_FORMAT_R (format); + break; + + case PIXMAN_TYPE_RGBA: + /* With BGRA formats we start counting at the high end of the pixel */ + *r = PIXMAN_FORMAT_BPP (format) - PIXMAN_FORMAT_R (format); + *g = *r - PIXMAN_FORMAT_R (format); + *b = *g - PIXMAN_FORMAT_G (format); + *a = *b - PIXMAN_FORMAT_B (format); + break; + + default: + assert (0); + break; } } -static void -fetch_scanline_x8b8g8r8 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) +static force_inline uint32_t +convert_channel (uint32_t pixel, uint32_t def_value, + int n_from_bits, int from_shift, + int n_to_bits, int to_shift) { - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint32_t *pixel = (uint32_t *)bits + x; - const uint32_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - - *buffer++ = 0xff000000 | - (p & 0x0000ff00) | - ((p >> 16) & 0xff) | - ((p & 0xff) << 16); - } + uint32_t v; + + if (n_from_bits && n_to_bits) + v = unorm_to_unorm (pixel >> from_shift, n_from_bits, n_to_bits); + else if (n_to_bits) + v = def_value; + else + v = 0; + + return (v & ((1 << n_to_bits) - 1)) << to_shift; } -static void -fetch_scanline_b8g8r8a8 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint32_t *pixel = (uint32_t *)bits + x; - const uint32_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - - *buffer++ = (((p & 0xff000000) >> 24) | - ((p & 0x00ff0000) >> 8) | - ((p & 0x0000ff00) << 8) | - ((p & 0x000000ff) << 24)); - } -} - -static void -fetch_scanline_b8g8r8x8 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint32_t *pixel = (uint32_t *)bits + x; - const uint32_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - - *buffer++ = (0xff000000 | - ((p & 0xff000000) >> 24) | - ((p & 0x00ff0000) >> 8) | - ((p & 0x0000ff00) << 8)); - } -} - -static void -fetch_scanline_x14r6g6b6 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint32_t *pixel = (const uint32_t *)bits + x; - const uint32_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r, g, b; - - r = ((p & 0x3f000) << 6) | ((p & 0x30000)); - g = ((p & 0x00fc0) << 4) | ((p & 0x00c00) >> 2); - b = ((p & 0x0003f) << 2) | ((p & 0x00030) >> 4); - - *buffer++ = 0xff000000 | r | g | b; - } -} - -/* Expects a uint64_t buffer */ -static void -fetch_scanline_a2r10g10b10 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * b, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint32_t *pixel = bits + x; - const uint32_t *end = pixel + width; - uint64_t *buffer = (uint64_t *)b; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint64_t a = p >> 30; - uint64_t r = (p >> 20) & 0x3ff; - uint64_t g = (p >> 10) & 0x3ff; - uint64_t b = p & 0x3ff; - - r = r << 6 | r >> 4; - g = g << 6 | g >> 4; - b = b << 6 | b >> 4; - - a <<= 14; - a |= a >> 2; - a |= a >> 4; - a |= a >> 8; - - *buffer++ = a << 48 | r << 32 | g << 16 | b; - } -} - -/* Expects a uint64_t buffer */ -static void -fetch_scanline_x2r10g10b10 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * b, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint32_t *pixel = (uint32_t *)bits + x; - const uint32_t *end = pixel + width; - uint64_t *buffer = (uint64_t *)b; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint64_t r = (p >> 20) & 0x3ff; - uint64_t g = (p >> 10) & 0x3ff; - uint64_t b = p & 0x3ff; - - r = r << 6 | r >> 4; - g = g << 6 | g >> 4; - b = b << 6 | b >> 4; - - *buffer++ = 0xffffULL << 48 | r << 32 | g << 16 | b; - } -} - -/* Expects a uint64_t buffer */ -static void -fetch_scanline_a2b10g10r10 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * b, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint32_t *pixel = bits + x; - const uint32_t *end = pixel + width; - uint64_t *buffer = (uint64_t *)b; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint64_t a = p >> 30; - uint64_t b = (p >> 20) & 0x3ff; - uint64_t g = (p >> 10) & 0x3ff; - uint64_t r = p & 0x3ff; - - r = r << 6 | r >> 4; - g = g << 6 | g >> 4; - b = b << 6 | b >> 4; - - a <<= 14; - a |= a >> 2; - a |= a >> 4; - a |= a >> 8; - - *buffer++ = a << 48 | r << 32 | g << 16 | b; - } -} - -/* Expects a uint64_t buffer */ -static void -fetch_scanline_x2b10g10r10 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * b, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint32_t *pixel = (uint32_t *)bits + x; - const uint32_t *end = pixel + width; - uint64_t *buffer = (uint64_t *)b; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint64_t b = (p >> 20) & 0x3ff; - uint64_t g = (p >> 10) & 0x3ff; - uint64_t r = p & 0x3ff; - - r = r << 6 | r >> 4; - g = g << 6 | g >> 4; - b = b << 6 | b >> 4; - - *buffer++ = 0xffffULL << 48 | r << 32 | g << 16 | b; - } -} - -static void -fetch_scanline_r8g8b8 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint8_t *pixel = (const uint8_t *)bits + 3 * x; - const uint8_t *end = pixel + 3 * width; - - while (pixel < end) - { - uint32_t b = 0xff000000; - -#ifdef WORDS_BIGENDIAN - b |= (READ (image, pixel++) << 16); - b |= (READ (image, pixel++) << 8); - b |= (READ (image, pixel++)); -#else - b |= (READ (image, pixel++)); - b |= (READ (image, pixel++) << 8); - b |= (READ (image, pixel++) << 16); -#endif - - *buffer++ = b; - } -} - -static void -fetch_scanline_b8g8r8 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint8_t *pixel = (const uint8_t *)bits + 3 * x; - const uint8_t *end = pixel + 3 * width; - - while (pixel < end) - { - uint32_t b = 0xff000000; -#ifdef WORDS_BIGENDIAN - b |= (READ (image, pixel++)); - b |= (READ (image, pixel++) << 8); - b |= (READ (image, pixel++) << 16); -#else - b |= (READ (image, pixel++) << 16); - b |= (READ (image, pixel++) << 8); - b |= (READ (image, pixel++)); -#endif - *buffer++ = b; - } -} - -static void -fetch_scanline_r5g6b5 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint16_t *pixel = (const uint16_t *)bits + x; - const uint16_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r = (((p) << 3) & 0xf8) | - (((p) << 5) & 0xfc00) | - (((p) << 8) & 0xf80000); - - r |= (r >> 5) & 0x70007; - r |= (r >> 6) & 0x300; - - *buffer++ = 0xff000000 | r; - } -} - -static void -fetch_scanline_b5g6r5 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint16_t *pixel = (const uint16_t *)bits + x; - const uint16_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r, g, b; - - b = ((p & 0xf800) | ((p & 0xe000) >> 5)) >> 8; - g = ((p & 0x07e0) | ((p & 0x0600) >> 6)) << 5; - r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14; - - *buffer++ = 0xff000000 | r | g | b; - } -} - -static void -fetch_scanline_a1r5g5b5 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint16_t *pixel = (const uint16_t *)bits + x; - const uint16_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r, g, b, a; - - a = (uint32_t) ((uint8_t) (0 - ((p & 0x8000) >> 15))) << 24; - r = ((p & 0x7c00) | ((p & 0x7000) >> 5)) << 9; - g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6; - b = ((p & 0x001c) | ((p & 0x001f) << 5)) >> 2; - - *buffer++ = a | r | g | b; - } -} - -static void -fetch_scanline_x1r5g5b5 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint16_t *pixel = (const uint16_t *)bits + x; - const uint16_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r, g, b; - - r = ((p & 0x7c00) | ((p & 0x7000) >> 5)) << 9; - g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6; - b = ((p & 0x001c) | ((p & 0x001f) << 5)) >> 2; - - *buffer++ = 0xff000000 | r | g | b; - } -} - -static void -fetch_scanline_a1b5g5r5 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint16_t *pixel = (const uint16_t *)bits + x; - const uint16_t *end = pixel + width; - uint32_t r, g, b, a; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - - a = (uint32_t) ((uint8_t) (0 - ((p & 0x8000) >> 15))) << 24; - b = ((p & 0x7c00) | ((p & 0x7000) >> 5)) >> 7; - g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6; - r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14; - - *buffer++ = a | r | g | b; - } -} - -static void -fetch_scanline_x1b5g5r5 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint16_t *pixel = (const uint16_t *)bits + x; - const uint16_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r, g, b; - - b = ((p & 0x7c00) | ((p & 0x7000) >> 5)) >> 7; - g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6; - r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14; - - *buffer++ = 0xff000000 | r | g | b; - } -} - -static void -fetch_scanline_a4r4g4b4 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint16_t *pixel = (const uint16_t *)bits + x; - const uint16_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r, g, b, a; - - a = ((p & 0xf000) | ((p & 0xf000) >> 4)) << 16; - r = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) << 12; - g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8; - b = ((p & 0x000f) | ((p & 0x000f) << 4)); - - *buffer++ = a | r | g | b; - } -} - -static void -fetch_scanline_x4r4g4b4 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint16_t *pixel = (const uint16_t *)bits + x; - const uint16_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r, g, b; - - r = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) << 12; - g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8; - b = ((p & 0x000f) | ((p & 0x000f) << 4)); - - *buffer++ = 0xff000000 | r | g | b; - } -} - -static void -fetch_scanline_a4b4g4r4 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint16_t *pixel = (const uint16_t *)bits + x; - const uint16_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r, g, b, a; - - a = ((p & 0xf000) | ((p & 0xf000) >> 4)) << 16; - b = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) >> 4; - g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8; - r = ((p & 0x000f) | ((p & 0x000f) << 4)) << 16; - - *buffer++ = a | r | g | b; - } -} - -static void -fetch_scanline_x4b4g4r4 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint16_t *pixel = (const uint16_t *)bits + x; - const uint16_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r, g, b; - - b = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) >> 4; - g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8; - r = ((p & 0x000f) | ((p & 0x000f) << 4)) << 16; - - *buffer++ = 0xff000000 | r | g | b; - } -} - -static void -fetch_scanline_a8 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint8_t *pixel = (const uint8_t *)bits + x; - const uint8_t *end = pixel + width; - - while (pixel < end) - *buffer++ = READ (image, pixel++) << 24; -} - -static void -fetch_scanline_r3g3b2 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint8_t *pixel = (const uint8_t *)bits + x; - const uint8_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r, g, b; - - r = ((p & 0xe0) | ((p & 0xe0) >> 3) | ((p & 0xc0) >> 6)) << 16; - g = ((p & 0x1c) | ((p & 0x18) >> 3) | ((p & 0x1c) << 3)) << 8; - b = (((p & 0x03) ) | - ((p & 0x03) << 2) | - ((p & 0x03) << 4) | - ((p & 0x03) << 6)); - - *buffer++ = 0xff000000 | r | g | b; - } -} - -static void -fetch_scanline_b2g3r3 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint8_t *pixel = (const uint8_t *)bits + x; - const uint8_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r, g, b; - - b = p & 0xc0; - b |= b >> 2; - b |= b >> 4; - b &= 0xff; - - g = (p & 0x38) << 10; - g |= g >> 3; - g |= g >> 6; - g &= 0xff00; - - r = (p & 0x7) << 21; - r |= r >> 3; - r |= r >> 6; - r &= 0xff0000; - - *buffer++ = 0xff000000 | r | g | b; - } -} - -static void -fetch_scanline_a2r2g2b2 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint8_t *pixel = (const uint8_t *)bits + x; - const uint8_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t a, r, g, b; - - a = ((p & 0xc0) * 0x55) << 18; - r = ((p & 0x30) * 0x55) << 12; - g = ((p & 0x0c) * 0x55) << 6; - b = ((p & 0x03) * 0x55); - - *buffer++ = a | r | g | b; - } -} - -static void -fetch_scanline_a2b2g2r2 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint8_t *pixel = (const uint8_t *)bits + x; - const uint8_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t a, r, g, b; - - a = ((p & 0xc0) * 0x55) << 18; - b = ((p & 0x30) * 0x55) >> 4; - g = ((p & 0x0c) * 0x55) << 6; - r = ((p & 0x03) * 0x55) << 16; - - *buffer++ = a | r | g | b; - } -} - -static void -fetch_scanline_c8 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const pixman_indexed_t * indexed = image->bits.indexed; - const uint8_t *pixel = (const uint8_t *)bits + x; - const uint8_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - - *buffer++ = indexed->rgba[p]; - } -} - -static void -fetch_scanline_x4a4 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint8_t *pixel = (const uint8_t *)bits + x; - const uint8_t *end = pixel + width; - - while (pixel < end) - { - uint8_t p = READ (image, pixel++) & 0xf; - - *buffer++ = (p | (p << 4)) << 24; - } -} - -#define FETCH_8(img,l,o) (READ (img, (((uint8_t *)(l)) + ((o) >> 3)))) -#ifdef WORDS_BIGENDIAN -#define FETCH_4(img,l,o) \ - (((4 * (o)) & 4) ? (FETCH_8 (img,l, 4 * (o)) & 0xf) : (FETCH_8 (img,l,(4 * (o))) >> 4)) -#else -#define FETCH_4(img,l,o) \ - (((4 * (o)) & 4) ? (FETCH_8 (img, l, 4 * (o)) >> 4) : (FETCH_8 (img, l, (4 * (o))) & 0xf)) -#endif - -static void -fetch_scanline_a4 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t p = FETCH_4 (image, bits, i + x); - - p |= p << 4; - - *buffer++ = p << 24; - } -} - -static void -fetch_scanline_r1g2b1 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t p = FETCH_4 (image, bits, i + x); - uint32_t r, g, b; - - r = ((p & 0x8) * 0xff) << 13; - g = ((p & 0x6) * 0x55) << 7; - b = ((p & 0x1) * 0xff); - - *buffer++ = 0xff000000 | r | g | b; - } -} - -static void -fetch_scanline_b1g2r1 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t p = FETCH_4 (image, bits, i + x); - uint32_t r, g, b; - - b = ((p & 0x8) * 0xff) >> 3; - g = ((p & 0x6) * 0x55) << 7; - r = ((p & 0x1) * 0xff) << 16; - - *buffer++ = 0xff000000 | r | g | b; - } -} - -static void -fetch_scanline_a1r1g1b1 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) +static force_inline uint32_t +convert_pixel (pixman_format_code_t from, pixman_format_code_t to, uint32_t pixel) { + int a_from_shift, r_from_shift, g_from_shift, b_from_shift; + int a_to_shift, r_to_shift, g_to_shift, b_to_shift; uint32_t a, r, g, b; - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - int i; - for (i = 0; i < width; ++i) + get_shifts (from, &a_from_shift, &r_from_shift, &g_from_shift, &b_from_shift); + get_shifts (to, &a_to_shift, &r_to_shift, &g_to_shift, &b_to_shift); + + a = convert_channel (pixel, ~0, + PIXMAN_FORMAT_A (from), a_from_shift, + PIXMAN_FORMAT_A (to), a_to_shift); + + r = convert_channel (pixel, 0, + PIXMAN_FORMAT_R (from), r_from_shift, + PIXMAN_FORMAT_R (to), r_to_shift); + + g = convert_channel (pixel, 0, + PIXMAN_FORMAT_G (from), g_from_shift, + PIXMAN_FORMAT_G (to), g_to_shift); + + b = convert_channel (pixel, 0, + PIXMAN_FORMAT_B (from), b_from_shift, + PIXMAN_FORMAT_B (to), b_to_shift); + + return a | r | g | b; +} + +static force_inline uint32_t +convert_pixel_to_a8r8g8b8 (pixman_image_t *image, + pixman_format_code_t format, + uint32_t pixel) +{ + if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY || + PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR) { - uint32_t p = FETCH_4 (image, bits, i + x); - - a = ((p & 0x8) * 0xff) << 21; - r = ((p & 0x4) * 0xff) << 14; - g = ((p & 0x2) * 0xff) << 7; - b = ((p & 0x1) * 0xff); - - *buffer++ = a | r | g | b; + return image->bits.indexed->rgba[pixel]; + } + else + { + return convert_pixel (format, PIXMAN_a8r8g8b8, pixel); } } -static void -fetch_scanline_a1b1g1r1 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) +static force_inline uint32_t +convert_pixel_from_a8r8g8b8 (pixman_image_t *image, + pixman_format_code_t format, uint32_t pixel) { - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - int i; - - for (i = 0; i < width; ++i) + if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY) { - uint32_t p = FETCH_4 (image, bits, i + x); - uint32_t a, r, g, b; + pixel = CONVERT_RGB24_TO_Y15 (pixel); - a = ((p & 0x8) * 0xff) << 21; - b = ((p & 0x4) * 0xff) >> 2; - g = ((p & 0x2) * 0xff) << 7; - r = ((p & 0x1) * 0xff) << 16; + return image->bits.indexed->ent[pixel & 0x7fff]; + } + else if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR) + { + pixel = convert_pixel (PIXMAN_a8r8g8b8, PIXMAN_x1r5g5b5, pixel); - *buffer++ = a | r | g | b; + return image->bits.indexed->ent[pixel & 0x7fff]; + } + else + { + return convert_pixel (PIXMAN_a8r8g8b8, format, pixel); } } -static void -fetch_scanline_c4 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) +static force_inline uint32_t +fetch_and_convert_pixel (pixman_image_t * image, + const uint8_t * bits, + int offset, + pixman_format_code_t format) { - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const pixman_indexed_t * indexed = image->bits.indexed; - int i; - - for (i = 0; i < width; ++i) + uint32_t pixel; + + switch (PIXMAN_FORMAT_BPP (format)) { - uint32_t p = FETCH_4 (image, bits, i + x); - - *buffer++ = indexed->rgba[p]; + case 1: + pixel = FETCH_1 (image, bits, offset); + break; + + case 4: + pixel = FETCH_4 (image, bits, offset); + break; + + case 8: + pixel = READ (image, bits + offset); + break; + + case 16: + pixel = READ (image, ((uint16_t *)bits + offset)); + break; + + case 24: + pixel = FETCH_24 (image, bits, offset); + break; + + case 32: + pixel = READ (image, ((uint32_t *)bits + offset)); + break; + + default: + pixel = 0xffff00ff; /* As ugly as possible to detect the bug */ + break; + } + + return convert_pixel_to_a8r8g8b8 (image, format, pixel); +} + +static force_inline void +convert_and_store_pixel (bits_image_t * image, + uint8_t * dest, + int offset, + pixman_format_code_t format, + uint32_t pixel) +{ + uint32_t converted = convert_pixel_from_a8r8g8b8 ( + (pixman_image_t *)image, format, pixel); + + switch (PIXMAN_FORMAT_BPP (format)) + { + case 1: + STORE_1 (image, dest, offset, converted & 0x01); + break; + + case 4: + STORE_4 (image, dest, offset, converted & 0xf); + break; + + case 8: + WRITE (image, (dest + offset), converted & 0xff); + break; + + case 16: + WRITE (image, ((uint16_t *)dest + offset), converted & 0xffff); + break; + + case 24: + STORE_24 (image, dest, offset, converted); + break; + + case 32: + WRITE (image, ((uint32_t *)dest + offset), converted); + break; + + default: + *dest = 0x0; + break; } } -static void -fetch_scanline_a1 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) +#define MAKE_ACCESSORS(format) \ + static void \ + fetch_scanline_ ## format (pixman_image_t *image, \ + int x, \ + int y, \ + int width, \ + uint32_t * buffer, \ + const uint32_t *mask) \ + { \ + uint8_t *bits = \ + (uint8_t *)(image->bits.bits + y * image->bits.rowstride); \ + int i; \ + \ + for (i = 0; i < width; ++i) \ + { \ + *buffer++ = \ + fetch_and_convert_pixel (image, bits, x + i, PIXMAN_ ## format); \ + } \ + } \ + \ + static void \ + store_scanline_ ## format (bits_image_t * image, \ + int x, \ + int y, \ + int width, \ + const uint32_t *values) \ + { \ + uint8_t *dest = \ + (uint8_t *)(image->bits + y * image->rowstride); \ + int i; \ + \ + for (i = 0; i < width; ++i) \ + { \ + convert_and_store_pixel ( \ + image, dest, i + x, PIXMAN_ ## format, values[i]); \ + } \ + } \ + \ + static uint32_t \ + fetch_pixel_ ## format (bits_image_t *image, \ + int offset, \ + int line) \ + { \ + uint8_t *bits = \ + (uint8_t *)(image->bits + line * image->rowstride); \ + \ + return fetch_and_convert_pixel ((pixman_image_t *)image, \ + bits, offset, PIXMAN_ ## format); \ + } \ + \ + static const void *const __dummy__ ## format + +MAKE_ACCESSORS(a8r8g8b8); +MAKE_ACCESSORS(x8r8g8b8); +MAKE_ACCESSORS(a8b8g8r8); +MAKE_ACCESSORS(x8b8g8r8); +MAKE_ACCESSORS(x14r6g6b6); +MAKE_ACCESSORS(b8g8r8a8); +MAKE_ACCESSORS(b8g8r8x8); +MAKE_ACCESSORS(r8g8b8x8); +MAKE_ACCESSORS(r8g8b8a8); +MAKE_ACCESSORS(r8g8b8); +MAKE_ACCESSORS(b8g8r8); +MAKE_ACCESSORS(r5g6b5); +MAKE_ACCESSORS(b5g6r5); +MAKE_ACCESSORS(a1r5g5b5); +MAKE_ACCESSORS(x1r5g5b5); +MAKE_ACCESSORS(a1b5g5r5); +MAKE_ACCESSORS(x1b5g5r5); +MAKE_ACCESSORS(a4r4g4b4); +MAKE_ACCESSORS(x4r4g4b4); +MAKE_ACCESSORS(a4b4g4r4); +MAKE_ACCESSORS(x4b4g4r4); +MAKE_ACCESSORS(a8); +MAKE_ACCESSORS(c8); +MAKE_ACCESSORS(g8); +MAKE_ACCESSORS(r3g3b2); +MAKE_ACCESSORS(b2g3r3); +MAKE_ACCESSORS(a2r2g2b2); +MAKE_ACCESSORS(a2b2g2r2); +MAKE_ACCESSORS(x4a4); +MAKE_ACCESSORS(a4); +MAKE_ACCESSORS(g4); +MAKE_ACCESSORS(c4); +MAKE_ACCESSORS(r1g2b1); +MAKE_ACCESSORS(b1g2r1); +MAKE_ACCESSORS(a1r1g1b1); +MAKE_ACCESSORS(a1b1g1r1); +MAKE_ACCESSORS(a1); +MAKE_ACCESSORS(g1); + +/********************************** Fetch ************************************/ +/* Table mapping sRGB-encoded 8 bit numbers to linearly encoded + * floating point numbers. We assume that single precision + * floating point follows the IEEE 754 format. + */ +static const uint32_t to_linear_u[256] = { - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - int i; - - for (i = 0; i < width; ++i) + 0x00000000, 0x399f22b4, 0x3a1f22b4, 0x3a6eb40e, 0x3a9f22b4, 0x3ac6eb61, + 0x3aeeb40e, 0x3b0b3e5d, 0x3b1f22b4, 0x3b33070b, 0x3b46eb61, 0x3b5b518a, + 0x3b70f18a, 0x3b83e1c5, 0x3b8fe614, 0x3b9c87fb, 0x3ba9c9b5, 0x3bb7ad6d, + 0x3bc63547, 0x3bd5635f, 0x3be539bd, 0x3bf5ba70, 0x3c0373b5, 0x3c0c6152, + 0x3c15a703, 0x3c1f45bc, 0x3c293e68, 0x3c3391f4, 0x3c3e4149, 0x3c494d43, + 0x3c54b6c7, 0x3c607eb1, 0x3c6ca5df, 0x3c792d22, 0x3c830aa8, 0x3c89af9e, + 0x3c9085db, 0x3c978dc5, 0x3c9ec7c0, 0x3ca63432, 0x3cadd37d, 0x3cb5a601, + 0x3cbdac20, 0x3cc5e639, 0x3cce54ab, 0x3cd6f7d2, 0x3cdfd00e, 0x3ce8ddb9, + 0x3cf2212c, 0x3cfb9ac1, 0x3d02a569, 0x3d0798dc, 0x3d0ca7e4, 0x3d11d2ae, + 0x3d171963, 0x3d1c7c2e, 0x3d21fb3a, 0x3d2796af, 0x3d2d4ebb, 0x3d332380, + 0x3d39152b, 0x3d3f23e3, 0x3d454fd0, 0x3d4b991c, 0x3d51ffeb, 0x3d588466, + 0x3d5f26b7, 0x3d65e6fe, 0x3d6cc564, 0x3d73c210, 0x3d7add25, 0x3d810b65, + 0x3d84b793, 0x3d88732e, 0x3d8c3e48, 0x3d9018f4, 0x3d940343, 0x3d97fd48, + 0x3d9c0714, 0x3da020b9, 0x3da44a48, 0x3da883d6, 0x3daccd70, 0x3db12728, + 0x3db59110, 0x3dba0b38, 0x3dbe95b2, 0x3dc3308f, 0x3dc7dbe0, 0x3dcc97b4, + 0x3dd1641c, 0x3dd6412a, 0x3ddb2eec, 0x3de02d75, 0x3de53cd3, 0x3dea5d16, + 0x3def8e52, 0x3df4d091, 0x3dfa23e5, 0x3dff885e, 0x3e027f06, 0x3e05427f, + 0x3e080ea2, 0x3e0ae376, 0x3e0dc104, 0x3e10a752, 0x3e139669, 0x3e168e50, + 0x3e198f0e, 0x3e1c98ab, 0x3e1fab2e, 0x3e22c6a0, 0x3e25eb08, 0x3e29186a, + 0x3e2c4ed0, 0x3e2f8e42, 0x3e32d6c4, 0x3e362861, 0x3e39831e, 0x3e3ce702, + 0x3e405416, 0x3e43ca5e, 0x3e4749e4, 0x3e4ad2ae, 0x3e4e64c2, 0x3e520027, + 0x3e55a4e6, 0x3e595303, 0x3e5d0a8a, 0x3e60cb7c, 0x3e6495e0, 0x3e6869bf, + 0x3e6c4720, 0x3e702e08, 0x3e741e7f, 0x3e78188c, 0x3e7c1c34, 0x3e8014c0, + 0x3e822039, 0x3e84308b, 0x3e8645b8, 0x3e885fc3, 0x3e8a7eb0, 0x3e8ca281, + 0x3e8ecb3a, 0x3e90f8df, 0x3e932b72, 0x3e9562f6, 0x3e979f6f, 0x3e99e0e0, + 0x3e9c274e, 0x3e9e72b8, 0x3ea0c322, 0x3ea31892, 0x3ea57308, 0x3ea7d28a, + 0x3eaa3718, 0x3eaca0b7, 0x3eaf0f69, 0x3eb18332, 0x3eb3fc16, 0x3eb67a15, + 0x3eb8fd34, 0x3ebb8576, 0x3ebe12de, 0x3ec0a56e, 0x3ec33d2a, 0x3ec5da14, + 0x3ec87c30, 0x3ecb2380, 0x3ecdd008, 0x3ed081ca, 0x3ed338c9, 0x3ed5f508, + 0x3ed8b68a, 0x3edb7d52, 0x3ede4962, 0x3ee11abe, 0x3ee3f168, 0x3ee6cd64, + 0x3ee9aeb6, 0x3eec955d, 0x3eef815d, 0x3ef272ba, 0x3ef56976, 0x3ef86594, + 0x3efb6717, 0x3efe6e02, 0x3f00bd2b, 0x3f02460c, 0x3f03d1a5, 0x3f055ff8, + 0x3f06f105, 0x3f0884ce, 0x3f0a1b54, 0x3f0bb499, 0x3f0d509f, 0x3f0eef65, + 0x3f1090ef, 0x3f12353c, 0x3f13dc50, 0x3f15862a, 0x3f1732cc, 0x3f18e237, + 0x3f1a946d, 0x3f1c4970, 0x3f1e013f, 0x3f1fbbde, 0x3f21794c, 0x3f23398c, + 0x3f24fca0, 0x3f26c286, 0x3f288b42, 0x3f2a56d3, 0x3f2c253d, 0x3f2df680, + 0x3f2fca9d, 0x3f31a195, 0x3f337b6a, 0x3f35581e, 0x3f3737b1, 0x3f391a24, + 0x3f3aff7a, 0x3f3ce7b2, 0x3f3ed2d0, 0x3f40c0d2, 0x3f42b1bc, 0x3f44a58e, + 0x3f469c49, 0x3f4895ee, 0x3f4a9280, 0x3f4c91ff, 0x3f4e946c, 0x3f5099c8, + 0x3f52a216, 0x3f54ad55, 0x3f56bb88, 0x3f58ccae, 0x3f5ae0cb, 0x3f5cf7de, + 0x3f5f11ec, 0x3f612ef0, 0x3f634eef, 0x3f6571ea, 0x3f6797e1, 0x3f69c0d6, + 0x3f6beccb, 0x3f6e1bc0, 0x3f704db6, 0x3f7282af, 0x3f74baac, 0x3f76f5ae, + 0x3f7933b6, 0x3f7b74c6, 0x3f7db8de, 0x3f800000 +}; + +static const float * const to_linear = (const float *)to_linear_u; + +static uint8_t +to_srgb (float f) +{ + uint8_t low = 0; + uint8_t high = 255; + + while (high - low > 1) { - uint32_t p = READ (image, bits + ((i + x) >> 5)); - uint32_t a; - -#ifdef WORDS_BIGENDIAN - a = p >> (0x1f - ((i + x) & 0x1f)); -#else - a = p >> ((i + x) & 0x1f); -#endif - a = a & 1; - a |= a << 1; - a |= a << 2; - a |= a << 4; - - *buffer++ = a << 24; + uint8_t mid = (low + high) / 2; + + if (to_linear[mid] > f) + high = mid; + else + low = mid; } + + if (to_linear[high] - f < f - to_linear[low]) + return high; + else + return low; } static void -fetch_scanline_g1 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) +fetch_scanline_a8r8g8b8_sRGB_float (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * b, + const uint32_t *mask) { const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const pixman_indexed_t * indexed = image->bits.indexed; - int i; - - for (i = 0; i < width; ++i) + const uint32_t *pixel = bits + x; + const uint32_t *end = pixel + width; + argb_t *buffer = (argb_t *)b; + + while (pixel < end) { - uint32_t p = READ (image, bits + ((i + x) >> 5)); - uint32_t a; - -#ifdef WORDS_BIGENDIAN - a = p >> (0x1f - ((i + x) & 0x1f)); -#else - a = p >> ((i + x) & 0x1f); -#endif - a = a & 1; - - *buffer++ = indexed->rgba[a]; + uint32_t p = READ (image, pixel++); + argb_t *argb = buffer; + + argb->a = pixman_unorm_to_float ((p >> 24) & 0xff, 8); + + argb->r = to_linear [(p >> 16) & 0xff]; + argb->g = to_linear [(p >> 8) & 0xff]; + argb->b = to_linear [(p >> 0) & 0xff]; + + buffer++; + } +} + +/* Expects a float buffer */ +static void +fetch_scanline_a2r10g10b10_float (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * b, + const uint32_t *mask) +{ + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const uint32_t *pixel = bits + x; + const uint32_t *end = pixel + width; + argb_t *buffer = (argb_t *)b; + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint64_t a = p >> 30; + uint64_t r = (p >> 20) & 0x3ff; + uint64_t g = (p >> 10) & 0x3ff; + uint64_t b = p & 0x3ff; + + buffer->a = pixman_unorm_to_float (a, 2); + buffer->r = pixman_unorm_to_float (r, 10); + buffer->g = pixman_unorm_to_float (g, 10); + buffer->b = pixman_unorm_to_float (b, 10); + + buffer++; + } +} + +/* Expects a float buffer */ +static void +fetch_scanline_x2r10g10b10_float (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * b, + const uint32_t *mask) +{ + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const uint32_t *pixel = (uint32_t *)bits + x; + const uint32_t *end = pixel + width; + argb_t *buffer = (argb_t *)b; + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint64_t r = (p >> 20) & 0x3ff; + uint64_t g = (p >> 10) & 0x3ff; + uint64_t b = p & 0x3ff; + + buffer->a = 1.0; + buffer->r = pixman_unorm_to_float (r, 10); + buffer->g = pixman_unorm_to_float (g, 10); + buffer->b = pixman_unorm_to_float (b, 10); + + buffer++; + } +} + +/* Expects a float buffer */ +static void +fetch_scanline_a2b10g10r10_float (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * b, + const uint32_t *mask) +{ + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const uint32_t *pixel = bits + x; + const uint32_t *end = pixel + width; + argb_t *buffer = (argb_t *)b; + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint64_t a = p >> 30; + uint64_t b = (p >> 20) & 0x3ff; + uint64_t g = (p >> 10) & 0x3ff; + uint64_t r = p & 0x3ff; + + buffer->a = pixman_unorm_to_float (a, 2); + buffer->r = pixman_unorm_to_float (r, 10); + buffer->g = pixman_unorm_to_float (g, 10); + buffer->b = pixman_unorm_to_float (b, 10); + + buffer++; + } +} + +/* Expects a float buffer */ +static void +fetch_scanline_x2b10g10r10_float (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * b, + const uint32_t *mask) +{ + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const uint32_t *pixel = (uint32_t *)bits + x; + const uint32_t *end = pixel + width; + argb_t *buffer = (argb_t *)b; + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint64_t b = (p >> 20) & 0x3ff; + uint64_t g = (p >> 10) & 0x3ff; + uint64_t r = p & 0x3ff; + + buffer->a = 1.0; + buffer->r = pixman_unorm_to_float (r, 10); + buffer->g = pixman_unorm_to_float (g, 10); + buffer->b = pixman_unorm_to_float (b, 10); + + buffer++; } } @@ -1128,11 +805,30 @@ fetch_scanline_yv12 (pixman_image_t *image, /**************************** Pixel wise fetching *****************************/ -/* Despite the type, expects a uint64_t buffer */ -static uint64_t -fetch_pixel_a2r10g10b10 (bits_image_t *image, - int offset, - int line) +static argb_t +fetch_pixel_x2r10g10b10_float (bits_image_t *image, + int offset, + int line) +{ + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t p = READ (image, bits + offset); + uint64_t r = (p >> 20) & 0x3ff; + uint64_t g = (p >> 10) & 0x3ff; + uint64_t b = p & 0x3ff; + argb_t argb; + + argb.a = 1.0; + argb.r = pixman_unorm_to_float (r, 10); + argb.g = pixman_unorm_to_float (g, 10); + argb.b = pixman_unorm_to_float (b, 10); + + return argb; +} + +static argb_t +fetch_pixel_a2r10g10b10_float (bits_image_t *image, + int offset, + int line) { uint32_t *bits = image->bits + line * image->rowstride; uint32_t p = READ (image, bits + offset); @@ -1140,43 +836,20 @@ fetch_pixel_a2r10g10b10 (bits_image_t *image, uint64_t r = (p >> 20) & 0x3ff; uint64_t g = (p >> 10) & 0x3ff; uint64_t b = p & 0x3ff; + argb_t argb; - r = r << 6 | r >> 4; - g = g << 6 | g >> 4; - b = b << 6 | b >> 4; + argb.a = pixman_unorm_to_float (a, 2); + argb.r = pixman_unorm_to_float (r, 10); + argb.g = pixman_unorm_to_float (g, 10); + argb.b = pixman_unorm_to_float (b, 10); - a <<= 14; - a |= a >> 2; - a |= a >> 4; - a |= a >> 8; - - return a << 48 | r << 32 | g << 16 | b; + return argb; } -/* Despite the type, this function expects a uint64_t buffer */ -static uint64_t -fetch_pixel_x2r10g10b10 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t p = READ (image, bits + offset); - uint64_t r = (p >> 20) & 0x3ff; - uint64_t g = (p >> 10) & 0x3ff; - uint64_t b = p & 0x3ff; - - r = r << 6 | r >> 4; - g = g << 6 | g >> 4; - b = b << 6 | b >> 4; - - return 0xffffULL << 48 | r << 32 | g << 16 | b; -} - -/* Despite the type, expects a uint64_t buffer */ -static uint64_t -fetch_pixel_a2b10g10r10 (bits_image_t *image, - int offset, - int line) +static argb_t +fetch_pixel_a2b10g10r10_float (bits_image_t *image, + int offset, + int line) { uint32_t *bits = image->bits + line * image->rowstride; uint32_t p = READ (image, bits + offset); @@ -1184,584 +857,52 @@ fetch_pixel_a2b10g10r10 (bits_image_t *image, uint64_t b = (p >> 20) & 0x3ff; uint64_t g = (p >> 10) & 0x3ff; uint64_t r = p & 0x3ff; - - r = r << 6 | r >> 4; - g = g << 6 | g >> 4; - b = b << 6 | b >> 4; - - a <<= 14; - a |= a >> 2; - a |= a >> 4; - a |= a >> 8; - - return a << 48 | r << 32 | g << 16 | b; + argb_t argb; + + argb.a = pixman_unorm_to_float (a, 2); + argb.r = pixman_unorm_to_float (r, 10); + argb.g = pixman_unorm_to_float (g, 10); + argb.b = pixman_unorm_to_float (b, 10); + + return argb; } -/* Despite the type, this function expects a uint64_t buffer */ -static uint64_t -fetch_pixel_x2b10g10r10 (bits_image_t *image, - int offset, - int line) +static argb_t +fetch_pixel_x2b10g10r10_float (bits_image_t *image, + int offset, + int line) { uint32_t *bits = image->bits + line * image->rowstride; uint32_t p = READ (image, bits + offset); uint64_t b = (p >> 20) & 0x3ff; uint64_t g = (p >> 10) & 0x3ff; uint64_t r = p & 0x3ff; - - r = r << 6 | r >> 4; - g = g << 6 | g >> 4; - b = b << 6 | b >> 4; - - return 0xffffULL << 48 | r << 32 | g << 16 | b; + argb_t argb; + + argb.a = 1.0; + argb.r = pixman_unorm_to_float (r, 10); + argb.g = pixman_unorm_to_float (g, 10); + argb.b = pixman_unorm_to_float (b, 10); + + return argb; } -static uint32_t -fetch_pixel_a8r8g8b8 (bits_image_t *image, - int offset, - int line) +static argb_t +fetch_pixel_a8r8g8b8_sRGB_float (bits_image_t *image, + int offset, + int line) { uint32_t *bits = image->bits + line * image->rowstride; - return READ (image, (uint32_t *)bits + offset); -} + uint32_t p = READ (image, bits + offset); + argb_t argb; -static uint32_t -fetch_pixel_x8r8g8b8 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; + argb.a = pixman_unorm_to_float ((p >> 24) & 0xff, 8); - return READ (image, (uint32_t *)bits + offset) | 0xff000000; -} + argb.r = to_linear [(p >> 16) & 0xff]; + argb.g = to_linear [(p >> 8) & 0xff]; + argb.b = to_linear [(p >> 0) & 0xff]; -static uint32_t -fetch_pixel_a8b8g8r8 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint32_t *)bits + offset); - - return ((pixel & 0xff000000) | - ((pixel >> 16) & 0xff) | - (pixel & 0x0000ff00) | - ((pixel & 0xff) << 16)); -} - -static uint32_t -fetch_pixel_x8b8g8r8 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint32_t *)bits + offset); - - return ((0xff000000) | - ((pixel >> 16) & 0xff) | - (pixel & 0x0000ff00) | - ((pixel & 0xff) << 16)); -} - -static uint32_t -fetch_pixel_b8g8r8a8 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint32_t *)bits + offset); - - return ((pixel & 0xff000000) >> 24 | - (pixel & 0x00ff0000) >> 8 | - (pixel & 0x0000ff00) << 8 | - (pixel & 0x000000ff) << 24); -} - -static uint32_t -fetch_pixel_b8g8r8x8 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint32_t *)bits + offset); - - return ((0xff000000) | - (pixel & 0xff000000) >> 24 | - (pixel & 0x00ff0000) >> 8 | - (pixel & 0x0000ff00) << 8); -} - -static uint32_t -fetch_pixel_x14r6g6b6 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint32_t *) bits + offset); - uint32_t r, g, b; - - r = ((pixel & 0x3f000) << 6) | ((pixel & 0x30000)); - g = ((pixel & 0x00fc0) << 4) | ((pixel & 0x00c00) >> 2); - b = ((pixel & 0x0003f) << 2) | ((pixel & 0x00030) >> 4); - - return 0xff000000 | r | g | b; -} - -static uint32_t -fetch_pixel_r8g8b8 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint8_t *pixel = ((uint8_t *) bits) + (offset * 3); - -#ifdef WORDS_BIGENDIAN - return (0xff000000 | - (READ (image, pixel + 0) << 16) | - (READ (image, pixel + 1) << 8) | - (READ (image, pixel + 2))); -#else - return (0xff000000 | - (READ (image, pixel + 2) << 16) | - (READ (image, pixel + 1) << 8) | - (READ (image, pixel + 0))); -#endif -} - -static uint32_t -fetch_pixel_b8g8r8 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint8_t *pixel = ((uint8_t *) bits) + (offset * 3); -#ifdef WORDS_BIGENDIAN - return (0xff000000 | - (READ (image, pixel + 2) << 16) | - (READ (image, pixel + 1) << 8) | - (READ (image, pixel + 0))); -#else - return (0xff000000 | - (READ (image, pixel + 0) << 16) | - (READ (image, pixel + 1) << 8) | - (READ (image, pixel + 2))); -#endif -} - -static uint32_t -fetch_pixel_r5g6b5 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint16_t *) bits + offset); - uint32_t r, g, b; - - r = ((pixel & 0xf800) | ((pixel & 0xe000) >> 5)) << 8; - g = ((pixel & 0x07e0) | ((pixel & 0x0600) >> 6)) << 5; - b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2; - - return (0xff000000 | r | g | b); -} - -static uint32_t -fetch_pixel_b5g6r5 (bits_image_t *image, - int offset, - int line) -{ - uint32_t r, g, b; - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint16_t *) bits + offset); - - b = ((pixel & 0xf800) | ((pixel & 0xe000) >> 5)) >> 8; - g = ((pixel & 0x07e0) | ((pixel & 0x0600) >> 6)) << 5; - r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14; - - return (0xff000000 | r | g | b); -} - -static uint32_t -fetch_pixel_a1r5g5b5 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint16_t *) bits + offset); - uint32_t a, r, g, b; - - a = (uint32_t) ((uint8_t) (0 - ((pixel & 0x8000) >> 15))) << 24; - r = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) << 9; - g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6; - b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2; - - return (a | r | g | b); -} - -static uint32_t -fetch_pixel_x1r5g5b5 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint16_t *) bits + offset); - uint32_t r, g, b; - - r = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) << 9; - g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6; - b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2; - - return (0xff000000 | r | g | b); -} - -static uint32_t -fetch_pixel_a1b5g5r5 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint16_t *) bits + offset); - uint32_t a, r, g, b; - - a = (uint32_t) ((uint8_t) (0 - ((pixel & 0x8000) >> 15))) << 24; - b = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) >> 7; - g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6; - r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14; - - return (a | r | g | b); -} - -static uint32_t -fetch_pixel_x1b5g5r5 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint16_t *) bits + offset); - uint32_t r, g, b; - - b = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) >> 7; - g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6; - r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14; - - return (0xff000000 | r | g | b); -} - -static uint32_t -fetch_pixel_a4r4g4b4 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint16_t *) bits + offset); - uint32_t a, r, g, b; - - a = ((pixel & 0xf000) | ((pixel & 0xf000) >> 4)) << 16; - r = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) << 12; - g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8; - b = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)); - - return (a | r | g | b); -} - -static uint32_t -fetch_pixel_x4r4g4b4 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint16_t *) bits + offset); - uint32_t r, g, b; - - r = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) << 12; - g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8; - b = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)); - - return (0xff000000 | r | g | b); -} - -static uint32_t -fetch_pixel_a4b4g4r4 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint16_t *) bits + offset); - uint32_t a, r, g, b; - - a = ((pixel & 0xf000) | ((pixel & 0xf000) >> 4)) << 16; - b = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) >> 4; - g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8; - r = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)) << 16; - - return (a | r | g | b); -} - -static uint32_t -fetch_pixel_x4b4g4r4 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint16_t *) bits + offset); - uint32_t r, g, b; - - b = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) >> 4; - g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8; - r = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)) << 16; - - return (0xff000000 | r | g | b); -} - -static uint32_t -fetch_pixel_a8 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint8_t *) bits + offset); - - return pixel << 24; -} - -static uint32_t -fetch_pixel_r3g3b2 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint8_t *) bits + offset); - uint32_t r, g, b; - - r = ((pixel & 0xe0) | - ((pixel & 0xe0) >> 3) | - ((pixel & 0xc0) >> 6)) << 16; - - g = ((pixel & 0x1c) | - ((pixel & 0x18) >> 3) | - ((pixel & 0x1c) << 3)) << 8; - - b = (((pixel & 0x03) ) | - ((pixel & 0x03) << 2) | - ((pixel & 0x03) << 4) | - ((pixel & 0x03) << 6)); - - return (0xff000000 | r | g | b); -} - -static uint32_t -fetch_pixel_b2g3r3 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t p = READ (image, (uint8_t *) bits + offset); - uint32_t r, g, b; - - b = p & 0xc0; - b |= b >> 2; - b |= b >> 4; - b &= 0xff; - - g = (p & 0x38) << 10; - g |= g >> 3; - g |= g >> 6; - g &= 0xff00; - - r = (p & 0x7) << 21; - r |= r >> 3; - r |= r >> 6; - r &= 0xff0000; - - return 0xff000000 | r | g | b; -} - -static uint32_t -fetch_pixel_a2r2g2b2 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint8_t *) bits + offset); - uint32_t a, r, g, b; - - a = ((pixel & 0xc0) * 0x55) << 18; - r = ((pixel & 0x30) * 0x55) << 12; - g = ((pixel & 0x0c) * 0x55) << 6; - b = ((pixel & 0x03) * 0x55); - - return a | r | g | b; -} - -static uint32_t -fetch_pixel_a2b2g2r2 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint8_t *) bits + offset); - uint32_t a, r, g, b; - - a = ((pixel & 0xc0) * 0x55) << 18; - b = ((pixel & 0x30) * 0x55) >> 4; - g = ((pixel & 0x0c) * 0x55) << 6; - r = ((pixel & 0x03) * 0x55) << 16; - - return a | r | g | b; -} - -static uint32_t -fetch_pixel_c8 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint8_t *) bits + offset); - const pixman_indexed_t * indexed = image->indexed; - - return indexed->rgba[pixel]; -} - -static uint32_t -fetch_pixel_x4a4 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint8_t *) bits + offset); - - return ((pixel & 0xf) | ((pixel & 0xf) << 4)) << 24; -} - -static uint32_t -fetch_pixel_a4 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = FETCH_4 (image, bits, offset); - - pixel |= pixel << 4; - return pixel << 24; -} - -static uint32_t -fetch_pixel_r1g2b1 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = FETCH_4 (image, bits, offset); - uint32_t r, g, b; - - r = ((pixel & 0x8) * 0xff) << 13; - g = ((pixel & 0x6) * 0x55) << 7; - b = ((pixel & 0x1) * 0xff); - - return 0xff000000 | r | g | b; -} - -static uint32_t -fetch_pixel_b1g2r1 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = FETCH_4 (image, bits, offset); - uint32_t r, g, b; - - b = ((pixel & 0x8) * 0xff) >> 3; - g = ((pixel & 0x6) * 0x55) << 7; - r = ((pixel & 0x1) * 0xff) << 16; - - return 0xff000000 | r | g | b; -} - -static uint32_t -fetch_pixel_a1r1g1b1 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = FETCH_4 (image, bits, offset); - uint32_t a, r, g, b; - - a = ((pixel & 0x8) * 0xff) << 21; - r = ((pixel & 0x4) * 0xff) << 14; - g = ((pixel & 0x2) * 0xff) << 7; - b = ((pixel & 0x1) * 0xff); - - return a | r | g | b; -} - -static uint32_t -fetch_pixel_a1b1g1r1 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = FETCH_4 (image, bits, offset); - uint32_t a, r, g, b; - - a = ((pixel & 0x8) * 0xff) << 21; - b = ((pixel & 0x4) * 0xff) >> 2; - g = ((pixel & 0x2) * 0xff) << 7; - r = ((pixel & 0x1) * 0xff) << 16; - - return a | r | g | b; -} - -static uint32_t -fetch_pixel_c4 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = FETCH_4 (image, bits, offset); - const pixman_indexed_t * indexed = image->indexed; - - return indexed->rgba[pixel]; -} - -static uint32_t -fetch_pixel_a1 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, bits + (offset >> 5)); - uint32_t a; - -#ifdef WORDS_BIGENDIAN - a = pixel >> (0x1f - (offset & 0x1f)); -#else - a = pixel >> (offset & 0x1f); -#endif - a = a & 1; - a |= a << 1; - a |= a << 2; - a |= a << 4; - - return a << 24; -} - -static uint32_t -fetch_pixel_g1 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, bits + (offset >> 5)); - const pixman_indexed_t * indexed = image->indexed; - uint32_t a; - -#ifdef WORDS_BIGENDIAN - a = pixel >> (0x1f - (offset & 0x1f)); -#else - a = pixel >> (offset & 0x1f); -#endif - a = a & 1; - - return indexed->rgba[a]; + return argb; } static uint32_t @@ -1821,980 +962,276 @@ fetch_pixel_yv12 (bits_image_t *image, /*********************************** Store ************************************/ -#define SPLIT_A(v) \ - uint32_t a = ((v) >> 24), \ - r = ((v) >> 16) & 0xff, \ - g = ((v) >> 8) & 0xff, \ - b = (v) & 0xff - -#define SPLIT(v) \ - uint32_t r = ((v) >> 16) & 0xff, \ - g = ((v) >> 8) & 0xff, \ - b = (v) & 0xff - static void -store_scanline_a2r10g10b10 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *v) +store_scanline_a2r10g10b10_float (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *v) { uint32_t *bits = image->bits + image->rowstride * y; uint32_t *pixel = bits + x; - uint64_t *values = (uint64_t *)v; + argb_t *values = (argb_t *)v; int i; - + for (i = 0; i < width; ++i) { + uint16_t a, r, g, b; + + a = pixman_float_to_unorm (values[i].a, 2); + r = pixman_float_to_unorm (values[i].r, 10); + g = pixman_float_to_unorm (values[i].g, 10); + b = pixman_float_to_unorm (values[i].b, 10); + WRITE (image, pixel++, - ((values[i] >> 32) & 0xc0000000) | - ((values[i] >> 18) & 0x3ff00000) | - ((values[i] >> 12) & 0xffc00) | - ((values[i] >> 6) & 0x3ff)); + (a << 30) | (r << 20) | (g << 10) | b); } } static void -store_scanline_x2r10g10b10 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *v) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint64_t *values = (uint64_t *)v; - uint32_t *pixel = bits + x; - int i; - - for (i = 0; i < width; ++i) - { - WRITE (image, pixel++, - ((values[i] >> 18) & 0x3ff00000) | - ((values[i] >> 12) & 0xffc00) | - ((values[i] >> 6) & 0x3ff)); - } -} - -static void -store_scanline_a2b10g10r10 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *v) +store_scanline_x2r10g10b10_float (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *v) { uint32_t *bits = image->bits + image->rowstride * y; uint32_t *pixel = bits + x; - uint64_t *values = (uint64_t *)v; + argb_t *values = (argb_t *)v; int i; - + for (i = 0; i < width; ++i) { + uint16_t r, g, b; + + r = pixman_float_to_unorm (values[i].r, 10); + g = pixman_float_to_unorm (values[i].g, 10); + b = pixman_float_to_unorm (values[i].b, 10); + WRITE (image, pixel++, - ((values[i] >> 32) & 0xc0000000) | - ((values[i] >> 38) & 0x3ff) | - ((values[i] >> 12) & 0xffc00) | - ((values[i] << 14) & 0x3ff00000)); + (r << 20) | (g << 10) | b); } } static void -store_scanline_x2b10g10r10 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *v) +store_scanline_a2b10g10r10_float (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *v) { uint32_t *bits = image->bits + image->rowstride * y; - uint64_t *values = (uint64_t *)v; uint32_t *pixel = bits + x; + argb_t *values = (argb_t *)v; int i; - + for (i = 0; i < width; ++i) { + uint16_t a, r, g, b; + + a = pixman_float_to_unorm (values[i].a, 2); + r = pixman_float_to_unorm (values[i].r, 10); + g = pixman_float_to_unorm (values[i].g, 10); + b = pixman_float_to_unorm (values[i].b, 10); + WRITE (image, pixel++, - ((values[i] >> 38) & 0x3ff) | - ((values[i] >> 12) & 0xffc00) | - ((values[i] << 14) & 0x3ff00000)); + (a << 30) | (b << 20) | (g << 10) | r); } } static void -store_scanline_a8r8g8b8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) +store_scanline_x2b10g10r10_float (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *v) { uint32_t *bits = image->bits + image->rowstride * y; - - MEMCPY_WRAPPED (image, ((uint32_t *)bits) + x, values, - width * sizeof(uint32_t)); -} - -static void -store_scanline_x8r8g8b8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint32_t *pixel = (uint32_t *)bits + x; + uint32_t *pixel = bits + x; + argb_t *values = (argb_t *)v; int i; - - for (i = 0; i < width; ++i) - WRITE (image, pixel++, values[i] & 0xffffff); -} -static void -store_scanline_a8b8g8r8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint32_t *pixel = (uint32_t *)bits + x; - int i; - for (i = 0; i < width; ++i) { + uint16_t r, g, b; + + r = pixman_float_to_unorm (values[i].r, 10); + g = pixman_float_to_unorm (values[i].g, 10); + b = pixman_float_to_unorm (values[i].b, 10); + WRITE (image, pixel++, - (values[i] & 0xff00ff00) | - ((values[i] >> 16) & 0xff) | - ((values[i] & 0xff) << 16)); + (b << 20) | (g << 10) | r); } } static void -store_scanline_x8b8g8r8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) +store_scanline_a8r8g8b8_sRGB_float (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *v) { uint32_t *bits = image->bits + image->rowstride * y; - uint32_t *pixel = (uint32_t *)bits + x; + uint32_t *pixel = bits + x; + argb_t *values = (argb_t *)v; int i; - + for (i = 0; i < width; ++i) { + uint8_t a, r, g, b; + + a = pixman_float_to_unorm (values[i].a, 8); + r = to_srgb (values[i].r); + g = to_srgb (values[i].g); + b = to_srgb (values[i].b); + WRITE (image, pixel++, - (values[i] & 0x0000ff00) | - ((values[i] >> 16) & 0xff) | - ((values[i] & 0xff) << 16)); - } -} - -static void -store_scanline_b8g8r8a8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint32_t *pixel = (uint32_t *)bits + x; - int i; - - for (i = 0; i < width; ++i) - { - WRITE (image, pixel++, - ((values[i] >> 24) & 0x000000ff) | - ((values[i] >> 8) & 0x0000ff00) | - ((values[i] << 8) & 0x00ff0000) | - ((values[i] << 24) & 0xff000000)); - } -} - -static void -store_scanline_b8g8r8x8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint32_t *pixel = (uint32_t *)bits + x; - int i; - - for (i = 0; i < width; ++i) - { - WRITE (image, pixel++, - ((values[i] >> 8) & 0x0000ff00) | - ((values[i] << 8) & 0x00ff0000) | - ((values[i] << 24) & 0xff000000)); - } -} - -static void -store_scanline_x14r6g6b6 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint32_t *pixel = ((uint32_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s = values[i]; - uint32_t r, g, b; - - r = (s & 0xfc0000) >> 6; - g = (s & 0x00fc00) >> 4; - b = (s & 0x0000fc) >> 2; - - WRITE (image, pixel++, r | g | b); - } -} - -static void -store_scanline_r8g8b8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint8_t *pixel = ((uint8_t *) bits) + 3 * x; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t val = values[i]; - -#ifdef WORDS_BIGENDIAN - WRITE (image, pixel++, (val & 0x00ff0000) >> 16); - WRITE (image, pixel++, (val & 0x0000ff00) >> 8); - WRITE (image, pixel++, (val & 0x000000ff) >> 0); -#else - WRITE (image, pixel++, (val & 0x000000ff) >> 0); - WRITE (image, pixel++, (val & 0x0000ff00) >> 8); - WRITE (image, pixel++, (val & 0x00ff0000) >> 16); -#endif - } -} - -static void -store_scanline_b8g8r8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint8_t *pixel = ((uint8_t *) bits) + 3 * x; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t val = values[i]; - -#ifdef WORDS_BIGENDIAN - WRITE (image, pixel++, (val & 0x000000ff) >> 0); - WRITE (image, pixel++, (val & 0x0000ff00) >> 8); - WRITE (image, pixel++, (val & 0x00ff0000) >> 16); -#else - WRITE (image, pixel++, (val & 0x00ff0000) >> 16); - WRITE (image, pixel++, (val & 0x0000ff00) >> 8); - WRITE (image, pixel++, (val & 0x000000ff) >> 0); -#endif - } -} - -static void -store_scanline_r5g6b5 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint16_t *pixel = ((uint16_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s = values[i]; - - WRITE (image, pixel++, - ((s >> 3) & 0x001f) | - ((s >> 5) & 0x07e0) | - ((s >> 8) & 0xf800)); - } -} - -static void -store_scanline_b5g6r5 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint16_t *pixel = ((uint16_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT (values[i]); - - WRITE (image, pixel++, - ((b << 8) & 0xf800) | - ((g << 3) & 0x07e0) | - ((r >> 3) )); - } -} - -static void -store_scanline_a1r5g5b5 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint16_t *pixel = ((uint16_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT_A (values[i]); - - WRITE (image, pixel++, - ((a << 8) & 0x8000) | - ((r << 7) & 0x7c00) | - ((g << 2) & 0x03e0) | - ((b >> 3) )); - } -} - -static void -store_scanline_x1r5g5b5 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint16_t *pixel = ((uint16_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT (values[i]); - - WRITE (image, pixel++, - ((r << 7) & 0x7c00) | - ((g << 2) & 0x03e0) | - ((b >> 3) )); - } -} - -static void -store_scanline_a1b5g5r5 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint16_t *pixel = ((uint16_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT_A (values[i]); - - WRITE (image, pixel++, - ((a << 8) & 0x8000) | - ((b << 7) & 0x7c00) | - ((g << 2) & 0x03e0) | - ((r >> 3) )); - } -} - -static void -store_scanline_x1b5g5r5 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint16_t *pixel = ((uint16_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT (values[i]); - - WRITE (image, pixel++, ((b << 7) & 0x7c00) | - ((g << 2) & 0x03e0) | - ((r >> 3) )); - } -} - -static void -store_scanline_a4r4g4b4 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint16_t *pixel = ((uint16_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT_A (values[i]); - - WRITE (image, pixel++, - ((a << 8) & 0xf000) | - ((r << 4) & 0x0f00) | - ((g ) & 0x00f0) | - ((b >> 4) )); - } -} - -static void -store_scanline_x4r4g4b4 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint16_t *pixel = ((uint16_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT (values[i]); - - WRITE (image, pixel++, - ((r << 4) & 0x0f00) | - ((g ) & 0x00f0) | - ((b >> 4) )); - } -} - -static void -store_scanline_a4b4g4r4 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint16_t *pixel = ((uint16_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT_A (values[i]); - WRITE (image, pixel++, ((a << 8) & 0xf000) | - ((b << 4) & 0x0f00) | - ((g ) & 0x00f0) | - ((r >> 4) )); - } -} - -static void -store_scanline_x4b4g4r4 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint16_t *pixel = ((uint16_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT (values[i]); - - WRITE (image, pixel++, - ((b << 4) & 0x0f00) | - ((g ) & 0x00f0) | - ((r >> 4) )); - } -} - -static void -store_scanline_a8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint8_t *pixel = ((uint8_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - WRITE (image, pixel++, values[i] >> 24); - } -} - -static void -store_scanline_r3g3b2 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint8_t *pixel = ((uint8_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT (values[i]); - - WRITE (image, pixel++, - ((r ) & 0xe0) | - ((g >> 3) & 0x1c) | - ((b >> 6) )); - } -} - -static void -store_scanline_b2g3r3 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint8_t *pixel = ((uint8_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT (values[i]); - - WRITE (image, pixel++, - ((b ) & 0xc0) | - ((g >> 2) & 0x38) | - ((r >> 5) )); - } -} - -static void -store_scanline_a2r2g2b2 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint8_t *pixel = ((uint8_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT_A (values[i]); - - WRITE (image, pixel++, - ((a ) & 0xc0) | - ((r >> 2) & 0x30) | - ((g >> 4) & 0x0c) | - ((b >> 6) )); - } -} - -static void -store_scanline_a2b2g2r2 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint8_t *pixel = ((uint8_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT_A (values[i]); - - WRITE (image, pixel++, - ((a ) & 0xc0) | - ((b >> 2) & 0x30) | - ((g >> 4) & 0x0c) | - ((r >> 6) )); - } -} - -static void -store_scanline_c8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint8_t *pixel = ((uint8_t *) bits) + x; - const pixman_indexed_t *indexed = image->indexed; - int i; - - for (i = 0; i < width; ++i) - WRITE (image, pixel++, RGB24_TO_ENTRY (indexed,values[i])); -} - -static void -store_scanline_g8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint8_t *pixel = ((uint8_t *) bits) + x; - const pixman_indexed_t *indexed = image->indexed; - int i; - - for (i = 0; i < width; ++i) - WRITE (image, pixel++, RGB24_TO_ENTRY_Y (indexed,values[i])); -} - -static void -store_scanline_x4a4 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint8_t *pixel = ((uint8_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - WRITE (image, pixel++, values[i] >> 28); -} - -#define STORE_8(img,l,o,v) (WRITE (img, (uint8_t *)(l) + ((o) >> 3), (v))) -#ifdef WORDS_BIGENDIAN - -#define STORE_4(img,l,o,v) \ - do \ - { \ - int bo = 4 * (o); \ - int v4 = (v) & 0x0f; \ - \ - STORE_8 (img, l, bo, ( \ - bo & 4 ? \ - (FETCH_8 (img, l, bo) & 0xf0) | (v4) : \ - (FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4))); \ - } while (0) -#else - -#define STORE_4(img,l,o,v) \ - do \ - { \ - int bo = 4 * (o); \ - int v4 = (v) & 0x0f; \ - \ - STORE_8 (img, l, bo, ( \ - bo & 4 ? \ - (FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4) : \ - (FETCH_8 (img, l, bo) & 0xf0) | (v4))); \ - } while (0) -#endif - -static void -store_scanline_a4 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - int i; - - for (i = 0; i < width; ++i) - STORE_4 (image, bits, i + x, values[i] >> 28); -} - -static void -store_scanline_r1g2b1 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t pixel; - - SPLIT (values[i]); - pixel = (((r >> 4) & 0x8) | - ((g >> 5) & 0x6) | - ((b >> 7) )); - STORE_4 (image, bits, i + x, pixel); - } -} - -static void -store_scanline_b1g2r1 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t pixel; - - SPLIT (values[i]); - pixel = (((b >> 4) & 0x8) | - ((g >> 5) & 0x6) | - ((r >> 7) )); - STORE_4 (image, bits, i + x, pixel); - } -} - -static void -store_scanline_a1r1g1b1 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t pixel; - - SPLIT_A (values[i]); - pixel = (((a >> 4) & 0x8) | - ((r >> 5) & 0x4) | - ((g >> 6) & 0x2) | - ((b >> 7) )); - - STORE_4 (image, bits, i + x, pixel); - } -} - -static void -store_scanline_a1b1g1r1 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t pixel; - - SPLIT_A (values[i]); - pixel = (((a >> 4) & 0x8) | - ((b >> 5) & 0x4) | - ((g >> 6) & 0x2) | - ((r >> 7) )); - - STORE_4 (image, bits, i + x, pixel); - } -} - -static void -store_scanline_c4 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - const pixman_indexed_t *indexed = image->indexed; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t pixel; - - pixel = RGB24_TO_ENTRY (indexed, values[i]); - STORE_4 (image, bits, i + x, pixel); - } -} - -static void -store_scanline_g4 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - const pixman_indexed_t *indexed = image->indexed; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t pixel; - - pixel = RGB24_TO_ENTRY_Y (indexed, values[i]); - STORE_4 (image, bits, i + x, pixel); - } -} - -static void -store_scanline_a1 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t *pixel = ((uint32_t *) bits) + ((i + x) >> 5); - uint32_t mask, v; - -#ifdef WORDS_BIGENDIAN - mask = 1 << (0x1f - ((i + x) & 0x1f)); -#else - mask = 1 << ((i + x) & 0x1f); -#endif - v = values[i] & 0x80000000 ? mask : 0; - - WRITE (image, pixel, (READ (image, pixel) & ~mask) | v); - } -} - -static void -store_scanline_g1 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - const pixman_indexed_t *indexed = image->indexed; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t *pixel = ((uint32_t *) bits) + ((i + x) >> 5); - uint32_t mask, v; - -#ifdef WORDS_BIGENDIAN - mask = 1 << (0x1f - ((i + x) & 0x1f)); -#else - mask = 1 << ((i + x) & 0x1f); -#endif - v = RGB24_TO_ENTRY_Y (indexed, values[i]) & 0x1 ? mask : 0; - - WRITE (image, pixel, (READ (image, pixel) & ~mask) | v); + (a << 24) | (r << 16) | (g << 8) | b); } } /* - * Contracts a 64bpp image to 32bpp and then stores it using a regular 32-bit - * store proc. Despite the type, this function expects a uint64_t buffer. + * Contracts a floating point image to 32bpp and then stores it using a + * regular 32-bit store proc. Despite the type, this function expects an + * argb_t buffer. */ static void -store_scanline_generic_64 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) +store_scanline_generic_float (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { uint32_t *argb8_pixels; - + assert (image->common.type == BITS); - + argb8_pixels = pixman_malloc_ab (width, sizeof(uint32_t)); if (!argb8_pixels) return; - + /* Contract the scanline. We could do this in place if values weren't * const. */ - pixman_contract (argb8_pixels, (uint64_t *)values, width); - + pixman_contract_from_float (argb8_pixels, (argb_t *)values, width); + image->store_scanline_32 (image, x, y, width, argb8_pixels); - + free (argb8_pixels); } -/* Despite the type, this function expects both buffer - * and mask to be uint64_t - */ static void -fetch_scanline_generic_64 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) +fetch_scanline_generic_float (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask) { - pixman_format_code_t format; - - /* Fetch the pixels into the first half of buffer and then expand them in - * place. - */ image->bits.fetch_scanline_32 (image, x, y, width, buffer, NULL); - format = image->bits.format; - if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR || - PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY) - { - /* Indexed formats are mapped to a8r8g8b8 with full - * precision, so when expanding we shouldn't correct - * for the width of the channels - */ - - format = PIXMAN_a8r8g8b8; - } - - pixman_expand ((uint64_t *)buffer, buffer, format, width); + pixman_expand_to_float ((argb_t *)buffer, buffer, image->bits.format, width); } -/* Despite the type, this function expects a uint64_t *buffer */ -static uint64_t -fetch_pixel_generic_64 (bits_image_t *image, - int offset, - int line) +/* The 32_sRGB paths should be deleted after narrow processing + * is no longer invoked for formats that are considered wide. + * (Also see fetch_pixel_generic_lossy_32) */ +static void +fetch_scanline_a8r8g8b8_32_sRGB (pixman_image_t *image, + int x, + int y, + int width, + uint32_t *buffer, + const uint32_t *mask) +{ + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const uint32_t *pixel = (uint32_t *)bits + x; + const uint32_t *end = pixel + width; + uint32_t tmp; + + while (pixel < end) + { + uint8_t a, r, g, b; + + tmp = READ (image, pixel++); + + a = (tmp >> 24) & 0xff; + r = (tmp >> 16) & 0xff; + g = (tmp >> 8) & 0xff; + b = (tmp >> 0) & 0xff; + + r = to_linear[r] * 255.0f + 0.5f; + g = to_linear[g] * 255.0f + 0.5f; + b = to_linear[b] * 255.0f + 0.5f; + + *buffer++ = (a << 24) | (r << 16) | (g << 8) | (b << 0); + } +} + +static uint32_t +fetch_pixel_a8r8g8b8_32_sRGB (bits_image_t *image, + int offset, + int line) +{ + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t tmp = READ (image, bits + offset); + uint8_t a, r, g, b; + + a = (tmp >> 24) & 0xff; + r = (tmp >> 16) & 0xff; + g = (tmp >> 8) & 0xff; + b = (tmp >> 0) & 0xff; + + r = to_linear[r] * 255.0f + 0.5f; + g = to_linear[g] * 255.0f + 0.5f; + b = to_linear[b] * 255.0f + 0.5f; + + return (a << 24) | (r << 16) | (g << 8) | (b << 0); +} + +static void +store_scanline_a8r8g8b8_32_sRGB (bits_image_t *image, + int x, + int y, + int width, + const uint32_t *v) +{ + uint32_t *bits = image->bits + image->rowstride * y; + uint64_t *values = (uint64_t *)v; + uint32_t *pixel = bits + x; + uint64_t tmp; + int i; + + for (i = 0; i < width; ++i) + { + uint8_t a, r, g, b; + + tmp = values[i]; + + a = (tmp >> 24) & 0xff; + r = (tmp >> 16) & 0xff; + g = (tmp >> 8) & 0xff; + b = (tmp >> 0) & 0xff; + + r = to_srgb (r * (1/255.0f)); + g = to_srgb (g * (1/255.0f)); + b = to_srgb (b * (1/255.0f)); + + WRITE (image, pixel++, a | (r << 16) | (g << 8) | (b << 0)); + } +} + +static argb_t +fetch_pixel_generic_float (bits_image_t *image, + int offset, + int line) { uint32_t pixel32 = image->fetch_pixel_32 (image, offset, line); - uint64_t result; - pixman_format_code_t format; + argb_t f; - format = image->format; - if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR || - PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY) - { - /* Indexed formats are mapped to a8r8g8b8 with full - * precision, so when expanding we shouldn't correct - * for the width of the channels - */ - - format = PIXMAN_a8r8g8b8; - } - - pixman_expand ((uint64_t *)&result, &pixel32, format, 1); + pixman_expand_to_float (&f, &pixel32, image->format, 1); - return result; + return f; } /* @@ -2808,10 +1245,10 @@ fetch_pixel_generic_lossy_32 (bits_image_t *image, int offset, int line) { - uint64_t pixel64 = image->fetch_pixel_64 (image, offset, line); + argb_t pixel64 = image->fetch_pixel_float (image, offset, line); uint32_t result; - - pixman_contract (&result, &pixel64, 1); + + pixman_contract_from_float (&result, &pixel64, 1); return result; } @@ -2820,20 +1257,22 @@ typedef struct { pixman_format_code_t format; fetch_scanline_t fetch_scanline_32; - fetch_scanline_t fetch_scanline_64; + fetch_scanline_t fetch_scanline_float; fetch_pixel_32_t fetch_pixel_32; - fetch_pixel_64_t fetch_pixel_64; + fetch_pixel_float_t fetch_pixel_float; store_scanline_t store_scanline_32; - store_scanline_t store_scanline_64; + store_scanline_t store_scanline_float; } format_info_t; #define FORMAT_INFO(format) \ { \ PIXMAN_ ## format, \ fetch_scanline_ ## format, \ - fetch_scanline_generic_64, \ - fetch_pixel_ ## format, fetch_pixel_generic_64, \ - store_scanline_ ## format, store_scanline_generic_64 \ + fetch_scanline_generic_float, \ + fetch_pixel_ ## format, \ + fetch_pixel_generic_float, \ + store_scanline_ ## format, \ + store_scanline_generic_float \ } static const format_info_t accessors[] = @@ -2845,8 +1284,17 @@ static const format_info_t accessors[] = FORMAT_INFO (x8b8g8r8), FORMAT_INFO (b8g8r8a8), FORMAT_INFO (b8g8r8x8), + FORMAT_INFO (r8g8b8a8), + FORMAT_INFO (r8g8b8x8), FORMAT_INFO (x14r6g6b6), +/* sRGB formats */ + { PIXMAN_a8r8g8b8_sRGB, + fetch_scanline_a8r8g8b8_32_sRGB, fetch_scanline_a8r8g8b8_sRGB_float, + fetch_pixel_a8r8g8b8_32_sRGB, fetch_pixel_a8r8g8b8_sRGB_float, + store_scanline_a8r8g8b8_32_sRGB, store_scanline_a8r8g8b8_sRGB_float, + }, + /* 24bpp formats */ FORMAT_INFO (r8g8b8), FORMAT_INFO (b8g8r8), @@ -2873,8 +1321,6 @@ static const format_info_t accessors[] = FORMAT_INFO (c8), -#define fetch_scanline_g8 fetch_scanline_c8 -#define fetch_pixel_g8 fetch_pixel_c8 FORMAT_INFO (g8), #define fetch_scanline_x4c4 fetch_scanline_c8 @@ -2882,8 +1328,8 @@ static const format_info_t accessors[] = #define store_scanline_x4c4 store_scanline_c8 FORMAT_INFO (x4c4), -#define fetch_scanline_x4g4 fetch_scanline_c8 -#define fetch_pixel_x4g4 fetch_pixel_c8 +#define fetch_scanline_x4g4 fetch_scanline_g8 +#define fetch_pixel_x4g4 fetch_pixel_g8 #define store_scanline_x4g4 store_scanline_g8 FORMAT_INFO (x4g4), @@ -2898,8 +1344,6 @@ static const format_info_t accessors[] = FORMAT_INFO (c4), -#define fetch_scanline_g4 fetch_scanline_c4 -#define fetch_pixel_g4 fetch_pixel_c4 FORMAT_INFO (g4), /* 1bpp formats */ @@ -2909,34 +1353,34 @@ static const format_info_t accessors[] = /* Wide formats */ { PIXMAN_a2r10g10b10, - NULL, fetch_scanline_a2r10g10b10, - fetch_pixel_generic_lossy_32, fetch_pixel_a2r10g10b10, - NULL, store_scanline_a2r10g10b10 }, - + NULL, fetch_scanline_a2r10g10b10_float, + fetch_pixel_generic_lossy_32, fetch_pixel_a2r10g10b10_float, + NULL, store_scanline_a2r10g10b10_float }, + { PIXMAN_x2r10g10b10, - NULL, fetch_scanline_x2r10g10b10, - fetch_pixel_generic_lossy_32, fetch_pixel_x2r10g10b10, - NULL, store_scanline_x2r10g10b10 }, - + NULL, fetch_scanline_x2r10g10b10_float, + fetch_pixel_generic_lossy_32, fetch_pixel_x2r10g10b10_float, + NULL, store_scanline_x2r10g10b10_float }, + { PIXMAN_a2b10g10r10, - NULL, fetch_scanline_a2b10g10r10, - fetch_pixel_generic_lossy_32, fetch_pixel_a2b10g10r10, - NULL, store_scanline_a2b10g10r10 }, - + NULL, fetch_scanline_a2b10g10r10_float, + fetch_pixel_generic_lossy_32, fetch_pixel_a2b10g10r10_float, + NULL, store_scanline_a2b10g10r10_float }, + { PIXMAN_x2b10g10r10, - NULL, fetch_scanline_x2b10g10r10, - fetch_pixel_generic_lossy_32, fetch_pixel_x2b10g10r10, - NULL, store_scanline_x2b10g10r10 }, - + NULL, fetch_scanline_x2b10g10r10_float, + fetch_pixel_generic_lossy_32, fetch_pixel_x2b10g10r10_float, + NULL, store_scanline_x2b10g10r10_float }, + /* YUV formats */ { PIXMAN_yuy2, - fetch_scanline_yuy2, fetch_scanline_generic_64, - fetch_pixel_yuy2, fetch_pixel_generic_64, + fetch_scanline_yuy2, fetch_scanline_generic_float, + fetch_pixel_yuy2, fetch_pixel_generic_float, NULL, NULL }, - + { PIXMAN_yv12, - fetch_scanline_yv12, fetch_scanline_generic_64, - fetch_pixel_yv12, fetch_pixel_generic_64, + fetch_scanline_yv12, fetch_scanline_generic_float, + fetch_pixel_yv12, fetch_pixel_generic_float, NULL, NULL }, { PIXMAN_null }, @@ -2952,11 +1396,11 @@ setup_accessors (bits_image_t *image) if (info->format == image->format) { image->fetch_scanline_32 = info->fetch_scanline_32; - image->fetch_scanline_64 = info->fetch_scanline_64; + image->fetch_scanline_float = info->fetch_scanline_float; image->fetch_pixel_32 = info->fetch_pixel_32; - image->fetch_pixel_64 = info->fetch_pixel_64; + image->fetch_pixel_float = info->fetch_pixel_float; image->store_scanline_32 = info->store_scanline_32; - image->store_scanline_64 = info->store_scanline_64; + image->store_scanline_float = info->store_scanline_float; return; } diff --git a/programs/develop/libraries/pixman/pixman-accessor.h b/programs/develop/libraries/pixman/pixman-accessor.h index 90c8ea7b77..8e0b03621b 100644 --- a/programs/develop/libraries/pixman/pixman-accessor.h +++ b/programs/develop/libraries/pixman/pixman-accessor.h @@ -1,21 +1,10 @@ #ifdef PIXMAN_FB_ACCESSORS -#define ACCESS(sym) sym##_accessors - #define READ(img, ptr) \ (((bits_image_t *)(img))->read_func ((ptr), sizeof(*(ptr)))) #define WRITE(img, ptr,val) \ (((bits_image_t *)(img))->write_func ((ptr), (val), sizeof (*(ptr)))) -#define MEMCPY_WRAPPED(img, dst, src, size) \ - do { \ - size_t _i; \ - uint8_t *_dst = (uint8_t*)(dst), *_src = (uint8_t*)(src); \ - for(_i = 0; _i < size; _i++) { \ - WRITE((img), _dst +_i, READ((img), _src + _i)); \ - } \ - } while (0) - #define MEMSET_WRAPPED(img, dst, val, size) \ do { \ size_t _i; \ @@ -27,12 +16,8 @@ #else -#define ACCESS(sym) sym - #define READ(img, ptr) (*(ptr)) #define WRITE(img, ptr, val) (*(ptr) = (val)) -#define MEMCPY_WRAPPED(img, dst, src, size) \ - memcpy(dst, src, size) #define MEMSET_WRAPPED(img, dst, val, size) \ memset(dst, val, size) diff --git a/programs/develop/libraries/pixman/pixman-bits-image.c b/programs/develop/libraries/pixman/pixman-bits-image.c index b27a732848..75a39a1159 100644 --- a/programs/develop/libraries/pixman/pixman-bits-image.c +++ b/programs/develop/libraries/pixman/pixman-bits-image.c @@ -34,44 +34,20 @@ #include #include "pixman-private.h" #include "pixman-combine32.h" +#include "pixman-inlines.h" -/* Store functions */ -void -_pixman_image_store_scanline_32 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *buffer) +static uint32_t * +_pixman_image_get_scanline_generic_float (pixman_iter_t * iter, + const uint32_t *mask) { - image->store_scanline_32 (image, x, y, width, buffer); + pixman_iter_get_scanline_t fetch_32 = iter->data; + uint32_t *buffer = iter->buffer; - if (image->common.alpha_map) - { - x -= image->common.alpha_origin_x; - y -= image->common.alpha_origin_y; + fetch_32 (iter, NULL); - image->common.alpha_map->store_scanline_32 ( - image->common.alpha_map, x, y, width, buffer); - } -} + pixman_expand_to_float ((argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width); -void -_pixman_image_store_scanline_64 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *buffer) -{ - image->store_scanline_64 (image, x, y, width, buffer); - - if (image->common.alpha_map) - { - x -= image->common.alpha_origin_x; - y -= image->common.alpha_origin_y; - - image->common.alpha_map->store_scanline_64 ( - image->common.alpha_map, x, y, width, buffer); - } + return iter->buffer; } /* Fetch functions */ @@ -92,34 +68,6 @@ fetch_pixel_no_alpha (bits_image_t *image, typedef uint32_t (* get_pixel_t) (bits_image_t *image, int x, int y, pixman_bool_t check_bounds); -static force_inline void -repeat (pixman_repeat_t repeat, int size, int *coord) -{ - switch (repeat) - { - case PIXMAN_REPEAT_NORMAL: - *coord = MOD (*coord, size); - break; - - case PIXMAN_REPEAT_PAD: - *coord = CLIP (*coord, 0, size - 1); - break; - - case PIXMAN_REPEAT_REFLECT: - *coord = MOD (*coord, size * 2); - - if (*coord >= size) - *coord = size * 2 - *coord - 1; - break; - - case PIXMAN_REPEAT_NONE: - break; - - default: - break; - } -} - static force_inline uint32_t bits_image_fetch_pixel_nearest (bits_image_t *image, pixman_fixed_t x, @@ -131,8 +79,8 @@ bits_image_fetch_pixel_nearest (bits_image_t *image, if (image->common.repeat != PIXMAN_REPEAT_NONE) { - repeat (image->common.repeat, image->width, &x0); - repeat (image->common.repeat, image->height, &y0); + repeat (image->common.repeat, &x0, image->width); + repeat (image->common.repeat, &y0, image->height); return get_pixel (image, x0, y0, FALSE); } @@ -142,97 +90,6 @@ bits_image_fetch_pixel_nearest (bits_image_t *image, } } -#if SIZEOF_LONG > 4 - -static force_inline uint32_t -bilinear_interpolation (uint32_t tl, uint32_t tr, - uint32_t bl, uint32_t br, - int distx, int disty) -{ - uint64_t distxy, distxiy, distixy, distixiy; - uint64_t tl64, tr64, bl64, br64; - uint64_t f, r; - - distxy = distx * disty; - distxiy = distx * (256 - disty); - distixy = (256 - distx) * disty; - distixiy = (256 - distx) * (256 - disty); - - /* Alpha and Blue */ - tl64 = tl & 0xff0000ff; - tr64 = tr & 0xff0000ff; - bl64 = bl & 0xff0000ff; - br64 = br & 0xff0000ff; - - f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; - r = f & 0x0000ff0000ff0000ull; - - /* Red and Green */ - tl64 = tl; - tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull); - - tr64 = tr; - tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull); - - bl64 = bl; - bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull); - - br64 = br; - br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull); - - f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; - r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull); - - return (uint32_t)(r >> 16); -} - -#else - -static force_inline uint32_t -bilinear_interpolation (uint32_t tl, uint32_t tr, - uint32_t bl, uint32_t br, - int distx, int disty) -{ - int distxy, distxiy, distixy, distixiy; - uint32_t f, r; - - distxy = distx * disty; - distxiy = (distx << 8) - distxy; /* distx * (256 - disty) */ - distixy = (disty << 8) - distxy; /* disty * (256 - distx) */ - distixiy = - 256 * 256 - (disty << 8) - - (distx << 8) + distxy; /* (256 - distx) * (256 - disty) */ - - /* Blue */ - r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy - + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; - - /* Green */ - f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy - + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; - r |= f & 0xff000000; - - tl >>= 16; - tr >>= 16; - bl >>= 16; - br >>= 16; - r >>= 16; - - /* Red */ - f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy - + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; - r |= f & 0x00ff0000; - - /* Alpha */ - f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy - + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; - r |= f & 0xff000000; - - return r; -} - -#endif - static force_inline uint32_t bits_image_fetch_pixel_bilinear (bits_image_t *image, pixman_fixed_t x, @@ -249,8 +106,8 @@ bits_image_fetch_pixel_bilinear (bits_image_t *image, x1 = x - pixman_fixed_1 / 2; y1 = y - pixman_fixed_1 / 2; - distx = (x1 >> 8) & 0xff; - disty = (y1 >> 8) & 0xff; + distx = pixman_fixed_to_bilinear_weight (x1); + disty = pixman_fixed_to_bilinear_weight (y1); x1 = pixman_fixed_to_int (x1); y1 = pixman_fixed_to_int (y1); @@ -259,10 +116,10 @@ bits_image_fetch_pixel_bilinear (bits_image_t *image, if (repeat_mode != PIXMAN_REPEAT_NONE) { - repeat (repeat_mode, width, &x1); - repeat (repeat_mode, height, &y1); - repeat (repeat_mode, width, &x2); - repeat (repeat_mode, height, &y2); + repeat (repeat_mode, &x1, width); + repeat (repeat_mode, &y1, height); + repeat (repeat_mode, &x2, width); + repeat (repeat_mode, &y2, height); tl = get_pixel (image, x1, y1, FALSE); bl = get_pixel (image, x1, y2, FALSE); @@ -280,14 +137,17 @@ bits_image_fetch_pixel_bilinear (bits_image_t *image, return bilinear_interpolation (tl, tr, bl, br, distx, disty); } -static void -bits_image_fetch_bilinear_no_repeat_8888 (pixman_image_t * ima, - int offset, - int line, - int width, - uint32_t * buffer, - const uint32_t * mask) +static uint32_t * +bits_image_fetch_bilinear_no_repeat_8888 (pixman_iter_t *iter, + const uint32_t *mask) { + + pixman_image_t * ima = iter->image; + int offset = iter->x; + int line = iter->y++; + int width = iter->width; + uint32_t * buffer = iter->buffer; + bits_image_t *bits = &ima->bits; pixman_fixed_t x_top, x_bottom, x; pixman_fixed_t ux_top, ux_bottom, ux; @@ -309,13 +169,13 @@ bits_image_fetch_bilinear_no_repeat_8888 (pixman_image_t * ima, v.vector[2] = pixman_fixed_1; if (!pixman_transform_point_3d (bits->common.transform, &v)) - return; + return iter->buffer; ux = ux_top = ux_bottom = bits->common.transform->matrix[0][0]; x = x_top = x_bottom = v.vector[0] - pixman_fixed_1/2; y = v.vector[1] - pixman_fixed_1/2; - disty = (y >> 8) & 0xff; + disty = pixman_fixed_to_bilinear_weight (y); /* Load the pointers to the first and second lines from the source * image that bilinear code must read. @@ -376,7 +236,7 @@ bits_image_fetch_bilinear_no_repeat_8888 (pixman_image_t * ima, if (top_row == zero && bottom_row == zero) { memset (buffer, 0, width * sizeof (uint32_t)); - return; + return iter->buffer; } else if (bits->format == PIXMAN_x8r8g8b8) { @@ -424,7 +284,7 @@ bits_image_fetch_bilinear_no_repeat_8888 (pixman_image_t * ima, tr = top_row[pixman_fixed_to_int (x_top) + 1] | top_mask; br = bottom_row[pixman_fixed_to_int (x_bottom) + 1] | bottom_mask; - distx = (x >> 8) & 0xff; + distx = pixman_fixed_to_bilinear_weight (x); *buffer++ = bilinear_interpolation (0, tr, 0, br, distx, disty); @@ -449,7 +309,7 @@ bits_image_fetch_bilinear_no_repeat_8888 (pixman_image_t * ima, bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask; br = bottom_row [pixman_fixed_to_int (x_bottom) + 1] | bottom_mask; - distx = (x >> 8) & 0xff; + distx = pixman_fixed_to_bilinear_weight (x); *buffer = bilinear_interpolation (tl, tr, bl, br, distx, disty); } @@ -473,7 +333,7 @@ bits_image_fetch_bilinear_no_repeat_8888 (pixman_image_t * ima, tl = top_row [pixman_fixed_to_int (x_top)] | top_mask; bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask; - distx = (x >> 8) & 0xff; + distx = pixman_fixed_to_bilinear_weight (x); *buffer = bilinear_interpolation (tl, 0, bl, 0, distx, disty); } @@ -488,6 +348,8 @@ bits_image_fetch_bilinear_no_repeat_8888 (pixman_image_t * ima, /* Zero fill to the left of the image */ while (buffer < end) *buffer++ = 0; + + return iter->buffer; } static force_inline uint32_t @@ -501,11 +363,11 @@ bits_image_fetch_pixel_convolution (bits_image_t *image, int y_off = (params[1] - pixman_fixed_1) >> 1; int32_t cwidth = pixman_fixed_to_int (params[0]); int32_t cheight = pixman_fixed_to_int (params[1]); - int32_t srtot, sgtot, sbtot, satot; int32_t i, j, x1, x2, y1, y2; pixman_repeat_t repeat_mode = image->common.repeat; int width = image->width; int height = image->height; + int srtot, sgtot, sbtot, satot; params += 2; @@ -531,8 +393,8 @@ bits_image_fetch_pixel_convolution (bits_image_t *image, if (repeat_mode != PIXMAN_REPEAT_NONE) { - repeat (repeat_mode, width, &rx); - repeat (repeat_mode, height, &ry); + repeat (repeat_mode, &rx, width); + repeat (repeat_mode, &ry, height); pixel = get_pixel (image, rx, ry, FALSE); } @@ -541,20 +403,118 @@ bits_image_fetch_pixel_convolution (bits_image_t *image, pixel = get_pixel (image, rx, ry, TRUE); } - srtot += RED_8 (pixel) * f; - sgtot += GREEN_8 (pixel) * f; - sbtot += BLUE_8 (pixel) * f; - satot += ALPHA_8 (pixel) * f; + srtot += (int)RED_8 (pixel) * f; + sgtot += (int)GREEN_8 (pixel) * f; + sbtot += (int)BLUE_8 (pixel) * f; + satot += (int)ALPHA_8 (pixel) * f; } params++; } } - satot >>= 16; - srtot >>= 16; - sgtot >>= 16; - sbtot >>= 16; + satot = (satot + 0x8000) >> 16; + srtot = (srtot + 0x8000) >> 16; + sgtot = (sgtot + 0x8000) >> 16; + sbtot = (sbtot + 0x8000) >> 16; + + satot = CLIP (satot, 0, 0xff); + srtot = CLIP (srtot, 0, 0xff); + sgtot = CLIP (sgtot, 0, 0xff); + sbtot = CLIP (sbtot, 0, 0xff); + + return ((satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot)); +} + +static uint32_t +bits_image_fetch_pixel_separable_convolution (bits_image_t *image, + pixman_fixed_t x, + pixman_fixed_t y, + get_pixel_t get_pixel) +{ + pixman_fixed_t *params = image->common.filter_params; + pixman_repeat_t repeat_mode = image->common.repeat; + int width = image->width; + int height = image->height; + int cwidth = pixman_fixed_to_int (params[0]); + int cheight = pixman_fixed_to_int (params[1]); + int x_phase_bits = pixman_fixed_to_int (params[2]); + int y_phase_bits = pixman_fixed_to_int (params[3]); + int x_phase_shift = 16 - x_phase_bits; + int y_phase_shift = 16 - y_phase_bits; + int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1; + int y_off = ((cheight << 16) - pixman_fixed_1) >> 1; + pixman_fixed_t *y_params; + int srtot, sgtot, sbtot, satot; + int32_t x1, x2, y1, y2; + int32_t px, py; + int i, j; + + /* Round x and y to the middle of the closest phase before continuing. This + * ensures that the convolution matrix is aligned right, since it was + * positioned relative to a particular phase (and not relative to whatever + * exact fraction we happen to get here). + */ + x = ((x >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1); + y = ((y >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1); + + px = (x & 0xffff) >> x_phase_shift; + py = (y & 0xffff) >> y_phase_shift; + + y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight; + + x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off); + y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off); + x2 = x1 + cwidth; + y2 = y1 + cheight; + + srtot = sgtot = sbtot = satot = 0; + + for (i = y1; i < y2; ++i) + { + pixman_fixed_48_16_t fy = *y_params++; + pixman_fixed_t *x_params = params + 4 + px * cwidth; + + if (fy) + { + for (j = x1; j < x2; ++j) + { + pixman_fixed_t fx = *x_params++; + int rx = j; + int ry = i; + + if (fx) + { + pixman_fixed_t f; + uint32_t pixel; + + if (repeat_mode != PIXMAN_REPEAT_NONE) + { + repeat (repeat_mode, &rx, width); + repeat (repeat_mode, &ry, height); + + pixel = get_pixel (image, rx, ry, FALSE); + } + else + { + pixel = get_pixel (image, rx, ry, TRUE); + } + + f = (fy * fx + 0x8000) >> 16; + + srtot += (int)RED_8 (pixel) * f; + sgtot += (int)GREEN_8 (pixel) * f; + sbtot += (int)BLUE_8 (pixel) * f; + satot += (int)ALPHA_8 (pixel) * f; + } + } + } + } + + satot = (satot + 0x8000) >> 16; + srtot = (srtot + 0x8000) >> 16; + sgtot = (sgtot + 0x8000) >> 16; + sbtot = (sbtot + 0x8000) >> 16; satot = CLIP (satot, 0, 0xff); srtot = CLIP (srtot, 0, 0xff); @@ -587,6 +547,10 @@ bits_image_fetch_pixel_filtered (bits_image_t *image, return bits_image_fetch_pixel_convolution (image, x, y, get_pixel); break; + case PIXMAN_FILTER_SEPARABLE_CONVOLUTION: + return bits_image_fetch_pixel_separable_convolution (image, x, y, get_pixel); + break; + default: break; } @@ -594,14 +558,16 @@ bits_image_fetch_pixel_filtered (bits_image_t *image, return 0; } -static void -bits_image_fetch_affine_no_alpha (pixman_image_t * image, - int offset, - int line, - int width, - uint32_t * buffer, +static uint32_t * +bits_image_fetch_affine_no_alpha (pixman_iter_t * iter, const uint32_t * mask) { + pixman_image_t *image = iter->image; + int offset = iter->x; + int line = iter->y++; + int width = iter->width; + uint32_t * buffer = iter->buffer; + pixman_fixed_t x, y; pixman_fixed_t ux, uy; pixman_vector_t v; @@ -615,7 +581,7 @@ bits_image_fetch_affine_no_alpha (pixman_image_t * image, if (image->common.transform) { if (!pixman_transform_point_3d (image->common.transform, &v)) - return; + return iter->buffer; ux = image->common.transform->matrix[0][0]; uy = image->common.transform->matrix[1][0]; @@ -640,6 +606,8 @@ bits_image_fetch_affine_no_alpha (pixman_image_t * image, x += ux; y += uy; } + + return buffer; } /* General fetcher */ @@ -683,14 +651,16 @@ fetch_pixel_general (bits_image_t *image, int x, int y, pixman_bool_t check_boun return pixel; } -static void -bits_image_fetch_general (pixman_image_t * image, - int offset, - int line, - int width, - uint32_t * buffer, - const uint32_t * mask) +static uint32_t * +bits_image_fetch_general (pixman_iter_t *iter, + const uint32_t *mask) { + pixman_image_t *image = iter->image; + int offset = iter->x; + int line = iter->y++; + int width = iter->width; + uint32_t * buffer = iter->buffer; + pixman_fixed_t x, y, w; pixman_fixed_t ux, uy, uw; pixman_vector_t v; @@ -704,7 +674,7 @@ bits_image_fetch_general (pixman_image_t * image, if (image->common.transform) { if (!pixman_transform_point_3d (image->common.transform, &v)) - return; + return buffer; ux = image->common.transform->matrix[0][0]; uy = image->common.transform->matrix[1][0]; @@ -746,12 +716,158 @@ bits_image_fetch_general (pixman_image_t * image, y += uy; w += uw; } + + return buffer; +} + +typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x); + +static force_inline void +bits_image_fetch_separable_convolution_affine (pixman_image_t * image, + int offset, + int line, + int width, + uint32_t * buffer, + const uint32_t * mask, + + convert_pixel_t convert_pixel, + pixman_format_code_t format, + pixman_repeat_t repeat_mode) +{ + bits_image_t *bits = &image->bits; + pixman_fixed_t *params = image->common.filter_params; + int cwidth = pixman_fixed_to_int (params[0]); + int cheight = pixman_fixed_to_int (params[1]); + int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1; + int y_off = ((cheight << 16) - pixman_fixed_1) >> 1; + int x_phase_bits = pixman_fixed_to_int (params[2]); + int y_phase_bits = pixman_fixed_to_int (params[3]); + int x_phase_shift = 16 - x_phase_bits; + int y_phase_shift = 16 - y_phase_bits; + pixman_fixed_t vx, vy; + pixman_fixed_t ux, uy; + pixman_vector_t v; + int k; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (image->common.transform, &v)) + return; + + ux = image->common.transform->matrix[0][0]; + uy = image->common.transform->matrix[1][0]; + + vx = v.vector[0]; + vy = v.vector[1]; + + for (k = 0; k < width; ++k) + { + pixman_fixed_t *y_params; + int satot, srtot, sgtot, sbtot; + pixman_fixed_t x, y; + int32_t x1, x2, y1, y2; + int32_t px, py; + int i, j; + + if (mask && !mask[k]) + goto next; + + /* Round x and y to the middle of the closest phase before continuing. This + * ensures that the convolution matrix is aligned right, since it was + * positioned relative to a particular phase (and not relative to whatever + * exact fraction we happen to get here). + */ + x = ((vx >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1); + y = ((vy >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1); + + px = (x & 0xffff) >> x_phase_shift; + py = (y & 0xffff) >> y_phase_shift; + + x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off); + y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off); + x2 = x1 + cwidth; + y2 = y1 + cheight; + + satot = srtot = sgtot = sbtot = 0; + + y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight; + + for (i = y1; i < y2; ++i) + { + pixman_fixed_t fy = *y_params++; + + if (fy) + { + pixman_fixed_t *x_params = params + 4 + px * cwidth; + + for (j = x1; j < x2; ++j) + { + pixman_fixed_t fx = *x_params++; + int rx = j; + int ry = i; + + if (fx) + { + pixman_fixed_t f; + uint32_t pixel, mask; + uint8_t *row; + + mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; + + if (repeat_mode != PIXMAN_REPEAT_NONE) + { + repeat (repeat_mode, &rx, bits->width); + repeat (repeat_mode, &ry, bits->height); + + row = (uint8_t *)bits->bits + bits->rowstride * 4 * ry; + pixel = convert_pixel (row, rx) | mask; + } + else + { + if (rx < 0 || ry < 0 || rx >= bits->width || ry >= bits->height) + { + pixel = 0; + } + else + { + row = (uint8_t *)bits->bits + bits->rowstride * 4 * ry; + pixel = convert_pixel (row, rx) | mask; + } + } + + f = ((pixman_fixed_32_32_t)fx * fy + 0x8000) >> 16; + srtot += (int)RED_8 (pixel) * f; + sgtot += (int)GREEN_8 (pixel) * f; + sbtot += (int)BLUE_8 (pixel) * f; + satot += (int)ALPHA_8 (pixel) * f; + } + } + } + } + + satot = (satot + 0x8000) >> 16; + srtot = (srtot + 0x8000) >> 16; + sgtot = (sgtot + 0x8000) >> 16; + sbtot = (sbtot + 0x8000) >> 16; + + satot = CLIP (satot, 0, 0xff); + srtot = CLIP (srtot, 0, 0xff); + sgtot = CLIP (sgtot, 0, 0xff); + sbtot = CLIP (sbtot, 0, 0xff); + + buffer[k] = (satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot << 0); + + next: + vx += ux; + vy += uy; + } } static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; -typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x); - static force_inline void bits_image_fetch_bilinear_affine (pixman_image_t * image, int offset, @@ -800,8 +916,8 @@ bits_image_fetch_bilinear_affine (pixman_image_t * image, x1 = x - pixman_fixed_1 / 2; y1 = y - pixman_fixed_1 / 2; - distx = (x1 >> 8) & 0xff; - disty = (y1 >> 8) & 0xff; + distx = pixman_fixed_to_bilinear_weight (x1); + disty = pixman_fixed_to_bilinear_weight (y1); y1 = pixman_fixed_to_int (y1); y2 = y1 + 1; @@ -814,10 +930,10 @@ bits_image_fetch_bilinear_affine (pixman_image_t * image, mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; - repeat (repeat_mode, width, &x1); - repeat (repeat_mode, height, &y1); - repeat (repeat_mode, width, &x2); - repeat (repeat_mode, height, &y2); + repeat (repeat_mode, &x1, width); + repeat (repeat_mode, &y1, height); + repeat (repeat_mode, &x2, width); + repeat (repeat_mode, &y2, height); row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1; row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2; @@ -906,6 +1022,77 @@ bits_image_fetch_bilinear_affine (pixman_image_t * image, } } +static force_inline void +bits_image_fetch_nearest_affine (pixman_image_t * image, + int offset, + int line, + int width, + uint32_t * buffer, + const uint32_t * mask, + + convert_pixel_t convert_pixel, + pixman_format_code_t format, + pixman_repeat_t repeat_mode) +{ + pixman_fixed_t x, y; + pixman_fixed_t ux, uy; + pixman_vector_t v; + bits_image_t *bits = &image->bits; + int i; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (image->common.transform, &v)) + return; + + ux = image->common.transform->matrix[0][0]; + uy = image->common.transform->matrix[1][0]; + + x = v.vector[0]; + y = v.vector[1]; + + for (i = 0; i < width; ++i) + { + int width, height, x0, y0; + const uint8_t *row; + + if (mask && !mask[i]) + goto next; + + width = image->bits.width; + height = image->bits.height; + x0 = pixman_fixed_to_int (x - pixman_fixed_e); + y0 = pixman_fixed_to_int (y - pixman_fixed_e); + + if (repeat_mode == PIXMAN_REPEAT_NONE && + (y0 < 0 || y0 >= height || x0 < 0 || x0 >= width)) + { + buffer[i] = 0; + } + else + { + uint32_t mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; + + if (repeat_mode != PIXMAN_REPEAT_NONE) + { + repeat (repeat_mode, &x0, width); + repeat (repeat_mode, &y0, height); + } + + row = (uint8_t *)bits->bits + bits->rowstride * 4 * y0; + + buffer[i] = convert_pixel (row, x0) | mask; + } + + next: + x += ux; + y += uy; + } +} + static force_inline uint32_t convert_a8r8g8b8 (const uint8_t *row, int x) { @@ -927,54 +1114,89 @@ convert_a8 (const uint8_t *row, int x) static force_inline uint32_t convert_r5g6b5 (const uint8_t *row, int x) { - return CONVERT_0565_TO_0888 (*((uint16_t *)row + x)); + return convert_0565_to_0888 (*((uint16_t *)row + x)); } +#define MAKE_SEPARABLE_CONVOLUTION_FETCHER(name, format, repeat_mode) \ + static uint32_t * \ + bits_image_fetch_separable_convolution_affine_ ## name (pixman_iter_t *iter, \ + const uint32_t * mask) \ + { \ + bits_image_fetch_separable_convolution_affine ( \ + iter->image, \ + iter->x, iter->y++, \ + iter->width, \ + iter->buffer, mask, \ + convert_ ## format, \ + PIXMAN_ ## format, \ + repeat_mode); \ + \ + return iter->buffer; \ + } + #define MAKE_BILINEAR_FETCHER(name, format, repeat_mode) \ - static void \ - bits_image_fetch_bilinear_affine_ ## name (pixman_image_t *image, \ - int offset, \ - int line, \ - int width, \ - uint32_t * buffer, \ + static uint32_t * \ + bits_image_fetch_bilinear_affine_ ## name (pixman_iter_t *iter, \ const uint32_t * mask) \ { \ - bits_image_fetch_bilinear_affine (image, offset, line, width, buffer, mask, \ + bits_image_fetch_bilinear_affine (iter->image, \ + iter->x, iter->y++, \ + iter->width, \ + iter->buffer, mask, \ convert_ ## format, \ PIXMAN_ ## format, \ repeat_mode); \ - } \ - extern int no_such_variable + return iter->buffer; \ + } -MAKE_BILINEAR_FETCHER (pad_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_PAD); -MAKE_BILINEAR_FETCHER (none_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NONE); -MAKE_BILINEAR_FETCHER (reflect_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_REFLECT); -MAKE_BILINEAR_FETCHER (normal_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NORMAL); -MAKE_BILINEAR_FETCHER (pad_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_PAD); -MAKE_BILINEAR_FETCHER (none_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NONE); -MAKE_BILINEAR_FETCHER (reflect_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_REFLECT); -MAKE_BILINEAR_FETCHER (normal_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NORMAL); -MAKE_BILINEAR_FETCHER (pad_a8, a8, PIXMAN_REPEAT_PAD); -MAKE_BILINEAR_FETCHER (none_a8, a8, PIXMAN_REPEAT_NONE); -MAKE_BILINEAR_FETCHER (reflect_a8, a8, PIXMAN_REPEAT_REFLECT); -MAKE_BILINEAR_FETCHER (normal_a8, a8, PIXMAN_REPEAT_NORMAL); -MAKE_BILINEAR_FETCHER (pad_r5g6b5, r5g6b5, PIXMAN_REPEAT_PAD); -MAKE_BILINEAR_FETCHER (none_r5g6b5, r5g6b5, PIXMAN_REPEAT_NONE); -MAKE_BILINEAR_FETCHER (reflect_r5g6b5, r5g6b5, PIXMAN_REPEAT_REFLECT); -MAKE_BILINEAR_FETCHER (normal_r5g6b5, r5g6b5, PIXMAN_REPEAT_NORMAL); +#define MAKE_NEAREST_FETCHER(name, format, repeat_mode) \ + static uint32_t * \ + bits_image_fetch_nearest_affine_ ## name (pixman_iter_t *iter, \ + const uint32_t * mask) \ + { \ + bits_image_fetch_nearest_affine (iter->image, \ + iter->x, iter->y++, \ + iter->width, \ + iter->buffer, mask, \ + convert_ ## format, \ + PIXMAN_ ## format, \ + repeat_mode); \ + return iter->buffer; \ + } + +#define MAKE_FETCHERS(name, format, repeat_mode) \ + MAKE_NEAREST_FETCHER (name, format, repeat_mode) \ + MAKE_BILINEAR_FETCHER (name, format, repeat_mode) \ + MAKE_SEPARABLE_CONVOLUTION_FETCHER (name, format, repeat_mode) + +MAKE_FETCHERS (pad_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_PAD) +MAKE_FETCHERS (none_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NONE) +MAKE_FETCHERS (reflect_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_REFLECT) +MAKE_FETCHERS (normal_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NORMAL) +MAKE_FETCHERS (pad_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_PAD) +MAKE_FETCHERS (none_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NONE) +MAKE_FETCHERS (reflect_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_REFLECT) +MAKE_FETCHERS (normal_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NORMAL) +MAKE_FETCHERS (pad_a8, a8, PIXMAN_REPEAT_PAD) +MAKE_FETCHERS (none_a8, a8, PIXMAN_REPEAT_NONE) +MAKE_FETCHERS (reflect_a8, a8, PIXMAN_REPEAT_REFLECT) +MAKE_FETCHERS (normal_a8, a8, PIXMAN_REPEAT_NORMAL) +MAKE_FETCHERS (pad_r5g6b5, r5g6b5, PIXMAN_REPEAT_PAD) +MAKE_FETCHERS (none_r5g6b5, r5g6b5, PIXMAN_REPEAT_NONE) +MAKE_FETCHERS (reflect_r5g6b5, r5g6b5, PIXMAN_REPEAT_REFLECT) +MAKE_FETCHERS (normal_r5g6b5, r5g6b5, PIXMAN_REPEAT_NORMAL) static void -bits_image_fetch_solid_32 (pixman_image_t * image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t * mask) +replicate_pixel_32 (bits_image_t * bits, + int x, + int y, + int width, + uint32_t * buffer) { uint32_t color; uint32_t *end; - color = image->bits.fetch_pixel_32 (&image->bits, 0, 0); + color = bits->fetch_pixel_32 (bits, x, y); end = buffer + width; while (buffer < end) @@ -982,18 +1204,17 @@ bits_image_fetch_solid_32 (pixman_image_t * image, } static void -bits_image_fetch_solid_64 (pixman_image_t * image, - int x, - int y, - int width, - uint32_t * b, - const uint32_t * unused) +replicate_pixel_float (bits_image_t * bits, + int x, + int y, + int width, + uint32_t * b) { - uint64_t color; - uint64_t *buffer = (uint64_t *)b; - uint64_t *end; + argb_t color; + argb_t *buffer = (argb_t *)b; + argb_t *end; - color = image->bits.fetch_pixel_64 (&image->bits, 0, 0); + color = bits->fetch_pixel_float (bits, x, y); end = buffer + width; while (buffer < end) @@ -1012,7 +1233,7 @@ bits_image_fetch_untransformed_repeat_none (bits_image_t *image, if (y < 0 || y >= image->height) { - memset (buffer, 0, width * (wide? 8 : 4)); + memset (buffer, 0, width * (wide? sizeof (argb_t) : 4)); return; } @@ -1020,10 +1241,10 @@ bits_image_fetch_untransformed_repeat_none (bits_image_t *image, { w = MIN (width, -x); - memset (buffer, 0, w * (wide ? 8 : 4)); + memset (buffer, 0, w * (wide ? sizeof (argb_t) : 4)); width -= w; - buffer += w * (wide? 2 : 1); + buffer += w * (wide? 4 : 1); x += w; } @@ -1032,16 +1253,16 @@ bits_image_fetch_untransformed_repeat_none (bits_image_t *image, w = MIN (width, image->width - x); if (wide) - image->fetch_scanline_64 ((pixman_image_t *)image, x, y, w, buffer, NULL); + image->fetch_scanline_float ((pixman_image_t *)image, x, y, w, buffer, NULL); else image->fetch_scanline_32 ((pixman_image_t *)image, x, y, w, buffer, NULL); width -= w; - buffer += w * (wide? 2 : 1); + buffer += w * (wide? 4 : 1); x += w; } - memset (buffer, 0, width * (wide ? 8 : 4)); + memset (buffer, 0, width * (wide ? sizeof (argb_t) : 4)); } static void @@ -1060,6 +1281,16 @@ bits_image_fetch_untransformed_repeat_normal (bits_image_t *image, while (y >= image->height) y -= image->height; + if (image->width == 1) + { + if (wide) + replicate_pixel_float (image, 0, y, width, buffer); + else + replicate_pixel_32 (image, 0, y, width, buffer); + + return; + } + while (width) { while (x < 0) @@ -1070,24 +1301,26 @@ bits_image_fetch_untransformed_repeat_normal (bits_image_t *image, w = MIN (width, image->width - x); if (wide) - image->fetch_scanline_64 ((pixman_image_t *)image, x, y, w, buffer, NULL); + image->fetch_scanline_float ((pixman_image_t *)image, x, y, w, buffer, NULL); else image->fetch_scanline_32 ((pixman_image_t *)image, x, y, w, buffer, NULL); - buffer += w * (wide? 2 : 1); + buffer += w * (wide? 4 : 1); x += w; width -= w; } } -static void -bits_image_fetch_untransformed_32 (pixman_image_t * image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t * mask) +static uint32_t * +bits_image_fetch_untransformed_32 (pixman_iter_t * iter, + const uint32_t *mask) { + pixman_image_t *image = iter->image; + int x = iter->x; + int y = iter->y; + int width = iter->width; + uint32_t * buffer = iter->buffer; + if (image->common.repeat == PIXMAN_REPEAT_NONE) { bits_image_fetch_untransformed_repeat_none ( @@ -1098,16 +1331,21 @@ bits_image_fetch_untransformed_32 (pixman_image_t * image, bits_image_fetch_untransformed_repeat_normal ( &image->bits, FALSE, x, y, width, buffer); } + + iter->y++; + return buffer; } -static void -bits_image_fetch_untransformed_64 (pixman_image_t * image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t * unused) +static uint32_t * +bits_image_fetch_untransformed_float (pixman_iter_t * iter, + const uint32_t *mask) { + pixman_image_t *image = iter->image; + int x = iter->x; + int y = iter->y; + int width = iter->width; + uint32_t * buffer = iter->buffer; + if (image->common.repeat == PIXMAN_REPEAT_NONE) { bits_image_fetch_untransformed_repeat_none ( @@ -1118,24 +1356,21 @@ bits_image_fetch_untransformed_64 (pixman_image_t * image, bits_image_fetch_untransformed_repeat_normal ( &image->bits, TRUE, x, y, width, buffer); } + + iter->y++; + return buffer; } typedef struct { pixman_format_code_t format; uint32_t flags; - fetch_scanline_t fetch_32; - fetch_scanline_t fetch_64; + pixman_iter_get_scanline_t get_scanline_32; + pixman_iter_get_scanline_t get_scanline_float; } fetcher_info_t; static const fetcher_info_t fetcher_info[] = { - { PIXMAN_solid, - FAST_PATH_NO_ALPHA_MAP, - bits_image_fetch_solid_32, - bits_image_fetch_solid_64 - }, - { PIXMAN_any, (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_ID_TRANSFORM | @@ -1143,7 +1378,7 @@ static const fetcher_info_t fetcher_info[] = FAST_PATH_NO_PAD_REPEAT | FAST_PATH_NO_REFLECT_REPEAT), bits_image_fetch_untransformed_32, - bits_image_fetch_untransformed_64 + bits_image_fetch_untransformed_float }, #define FAST_BILINEAR_FLAGS \ @@ -1159,13 +1394,13 @@ static const fetcher_info_t fetcher_info[] = { PIXMAN_a8r8g8b8, FAST_BILINEAR_FLAGS, bits_image_fetch_bilinear_no_repeat_8888, - _pixman_image_get_scanline_generic_64 + _pixman_image_get_scanline_generic_float }, { PIXMAN_x8r8g8b8, FAST_BILINEAR_FLAGS, bits_image_fetch_bilinear_no_repeat_8888, - _pixman_image_get_scanline_generic_64 + _pixman_image_get_scanline_generic_float }, #define GENERAL_BILINEAR_FLAGS \ @@ -1175,39 +1410,76 @@ static const fetcher_info_t fetcher_info[] = FAST_PATH_AFFINE_TRANSFORM | \ FAST_PATH_BILINEAR_FILTER) +#define GENERAL_NEAREST_FLAGS \ + (FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_HAS_TRANSFORM | \ + FAST_PATH_AFFINE_TRANSFORM | \ + FAST_PATH_NEAREST_FILTER) + +#define GENERAL_SEPARABLE_CONVOLUTION_FLAGS \ + (FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_HAS_TRANSFORM | \ + FAST_PATH_AFFINE_TRANSFORM | \ + FAST_PATH_SEPARABLE_CONVOLUTION_FILTER) + +#define SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) \ + { PIXMAN_ ## format, \ + GENERAL_SEPARABLE_CONVOLUTION_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ + bits_image_fetch_separable_convolution_affine_ ## name, \ + _pixman_image_get_scanline_generic_float \ + }, + #define BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \ { PIXMAN_ ## format, \ GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ bits_image_fetch_bilinear_affine_ ## name, \ - _pixman_image_get_scanline_generic_64 \ + _pixman_image_get_scanline_generic_float \ }, - BILINEAR_AFFINE_FAST_PATH (pad_a8r8g8b8, a8r8g8b8, PAD) - BILINEAR_AFFINE_FAST_PATH (none_a8r8g8b8, a8r8g8b8, NONE) - BILINEAR_AFFINE_FAST_PATH (reflect_a8r8g8b8, a8r8g8b8, REFLECT) - BILINEAR_AFFINE_FAST_PATH (normal_a8r8g8b8, a8r8g8b8, NORMAL) - BILINEAR_AFFINE_FAST_PATH (pad_x8r8g8b8, x8r8g8b8, PAD) - BILINEAR_AFFINE_FAST_PATH (none_x8r8g8b8, x8r8g8b8, NONE) - BILINEAR_AFFINE_FAST_PATH (reflect_x8r8g8b8, x8r8g8b8, REFLECT) - BILINEAR_AFFINE_FAST_PATH (normal_x8r8g8b8, x8r8g8b8, NORMAL) - BILINEAR_AFFINE_FAST_PATH (pad_a8, a8, PAD) - BILINEAR_AFFINE_FAST_PATH (none_a8, a8, NONE) - BILINEAR_AFFINE_FAST_PATH (reflect_a8, a8, REFLECT) - BILINEAR_AFFINE_FAST_PATH (normal_a8, a8, NORMAL) - BILINEAR_AFFINE_FAST_PATH (pad_r5g6b5, r5g6b5, PAD) - BILINEAR_AFFINE_FAST_PATH (none_r5g6b5, r5g6b5, NONE) - BILINEAR_AFFINE_FAST_PATH (reflect_r5g6b5, r5g6b5, REFLECT) - BILINEAR_AFFINE_FAST_PATH (normal_r5g6b5, r5g6b5, NORMAL) +#define NEAREST_AFFINE_FAST_PATH(name, format, repeat) \ + { PIXMAN_ ## format, \ + GENERAL_NEAREST_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ + bits_image_fetch_nearest_affine_ ## name, \ + _pixman_image_get_scanline_generic_float \ + }, + +#define AFFINE_FAST_PATHS(name, format, repeat) \ + SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) \ + BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \ + NEAREST_AFFINE_FAST_PATH(name, format, repeat) + + AFFINE_FAST_PATHS (pad_a8r8g8b8, a8r8g8b8, PAD) + AFFINE_FAST_PATHS (none_a8r8g8b8, a8r8g8b8, NONE) + AFFINE_FAST_PATHS (reflect_a8r8g8b8, a8r8g8b8, REFLECT) + AFFINE_FAST_PATHS (normal_a8r8g8b8, a8r8g8b8, NORMAL) + AFFINE_FAST_PATHS (pad_x8r8g8b8, x8r8g8b8, PAD) + AFFINE_FAST_PATHS (none_x8r8g8b8, x8r8g8b8, NONE) + AFFINE_FAST_PATHS (reflect_x8r8g8b8, x8r8g8b8, REFLECT) + AFFINE_FAST_PATHS (normal_x8r8g8b8, x8r8g8b8, NORMAL) + AFFINE_FAST_PATHS (pad_a8, a8, PAD) + AFFINE_FAST_PATHS (none_a8, a8, NONE) + AFFINE_FAST_PATHS (reflect_a8, a8, REFLECT) + AFFINE_FAST_PATHS (normal_a8, a8, NORMAL) + AFFINE_FAST_PATHS (pad_r5g6b5, r5g6b5, PAD) + AFFINE_FAST_PATHS (none_r5g6b5, r5g6b5, NONE) + AFFINE_FAST_PATHS (reflect_r5g6b5, r5g6b5, REFLECT) + AFFINE_FAST_PATHS (normal_r5g6b5, r5g6b5, NORMAL) /* Affine, no alpha */ { PIXMAN_any, (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_HAS_TRANSFORM | FAST_PATH_AFFINE_TRANSFORM), bits_image_fetch_affine_no_alpha, - _pixman_image_get_scanline_generic_64 + _pixman_image_get_scanline_generic_float }, /* General */ - { PIXMAN_any, 0, bits_image_fetch_general, _pixman_image_get_scanline_generic_64 }, + { PIXMAN_any, + 0, + bits_image_fetch_general, + _pixman_image_get_scanline_generic_float + }, { PIXMAN_null }, }; @@ -1215,24 +1487,179 @@ static const fetcher_info_t fetcher_info[] = static void bits_image_property_changed (pixman_image_t *image) { - uint32_t flags = image->common.flags; + _pixman_bits_image_setup_accessors (&image->bits); +} + +void +_pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter) +{ pixman_format_code_t format = image->common.extended_format_code; + uint32_t flags = image->common.flags; const fetcher_info_t *info; - _pixman_bits_image_setup_accessors (&image->bits); - - info = fetcher_info; - while (info->format != PIXMAN_null) + for (info = fetcher_info; info->format != PIXMAN_null; ++info) { if ((info->format == format || info->format == PIXMAN_any) && (info->flags & flags) == info->flags) { - image->common.get_scanline_32 = info->fetch_32; - image->common.get_scanline_64 = info->fetch_64; - break; + if (iter->iter_flags & ITER_NARROW) + { + iter->get_scanline = info->get_scanline_32; + } + else + { + iter->data = info->get_scanline_32; + iter->get_scanline = info->get_scanline_float; + } + return; } + } - info++; + /* Just in case we somehow didn't find a scanline function */ + iter->get_scanline = _pixman_iter_get_scanline_noop; +} + +static uint32_t * +dest_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) +{ + pixman_image_t *image = iter->image; + int x = iter->x; + int y = iter->y; + int width = iter->width; + uint32_t * buffer = iter->buffer; + + image->bits.fetch_scanline_32 (image, x, y, width, buffer, mask); + if (image->common.alpha_map) + { + uint32_t *alpha; + + if ((alpha = malloc (width * sizeof (uint32_t)))) + { + int i; + + x -= image->common.alpha_origin_x; + y -= image->common.alpha_origin_y; + + image->common.alpha_map->fetch_scanline_32 ( + (pixman_image_t *)image->common.alpha_map, + x, y, width, alpha, mask); + + for (i = 0; i < width; ++i) + { + buffer[i] &= ~0xff000000; + buffer[i] |= (alpha[i] & 0xff000000); + } + + free (alpha); + } + } + + return iter->buffer; +} + +static uint32_t * +dest_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) +{ + bits_image_t * image = &iter->image->bits; + int x = iter->x; + int y = iter->y; + int width = iter->width; + argb_t * buffer = (argb_t *)iter->buffer; + + image->fetch_scanline_float ( + (pixman_image_t *)image, x, y, width, (uint32_t *)buffer, mask); + if (image->common.alpha_map) + { + argb_t *alpha; + + if ((alpha = malloc (width * sizeof (argb_t)))) + { + int i; + + x -= image->common.alpha_origin_x; + y -= image->common.alpha_origin_y; + + image->common.alpha_map->fetch_scanline_float ( + (pixman_image_t *)image->common.alpha_map, + x, y, width, (uint32_t *)alpha, mask); + + for (i = 0; i < width; ++i) + buffer[i].a = alpha[i].a; + + free (alpha); + } + } + + return iter->buffer; +} + +static void +dest_write_back_narrow (pixman_iter_t *iter) +{ + bits_image_t * image = &iter->image->bits; + int x = iter->x; + int y = iter->y; + int width = iter->width; + const uint32_t *buffer = iter->buffer; + + image->store_scanline_32 (image, x, y, width, buffer); + + if (image->common.alpha_map) + { + x -= image->common.alpha_origin_x; + y -= image->common.alpha_origin_y; + + image->common.alpha_map->store_scanline_32 ( + image->common.alpha_map, x, y, width, buffer); + } + + iter->y++; +} + +static void +dest_write_back_wide (pixman_iter_t *iter) +{ + bits_image_t * image = &iter->image->bits; + int x = iter->x; + int y = iter->y; + int width = iter->width; + const uint32_t *buffer = iter->buffer; + + image->store_scanline_float (image, x, y, width, buffer); + + if (image->common.alpha_map) + { + x -= image->common.alpha_origin_x; + y -= image->common.alpha_origin_y; + + image->common.alpha_map->store_scanline_float ( + image->common.alpha_map, x, y, width, buffer); + } + + iter->y++; +} + +void +_pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter) +{ + if (iter->iter_flags & ITER_NARROW) + { + if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == + (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) + { + iter->get_scanline = _pixman_iter_get_scanline_noop; + } + else + { + iter->get_scanline = dest_get_scanline_narrow; + } + + iter->write_back = dest_write_back_narrow; + } + else + { + iter->get_scanline = dest_get_scanline_wide; + iter->write_back = dest_write_back_wide; } } @@ -1240,10 +1667,11 @@ static uint32_t * create_bits (pixman_format_code_t format, int width, int height, - int * rowstride_bytes) + int * rowstride_bytes, + pixman_bool_t clear) { int stride; - int buf_size; + size_t buf_size; int bpp; /* what follows is a long-winded way, avoiding any possibility of integer @@ -1252,11 +1680,11 @@ create_bits (pixman_format_code_t format, */ bpp = PIXMAN_FORMAT_BPP (format); - if (pixman_multiply_overflows_int (width, bpp)) + if (_pixman_multiply_overflows_int (width, bpp)) return NULL; stride = width * bpp; - if (pixman_addition_overflows_int (stride, 0x1f)) + if (_pixman_addition_overflows_int (stride, 0x1f)) return NULL; stride += 0x1f; @@ -1264,7 +1692,7 @@ create_bits (pixman_format_code_t format, stride *= sizeof (uint32_t); - if (pixman_multiply_overflows_int (height, stride)) + if (_pixman_multiply_overflows_size (height, stride)) return NULL; buf_size = height * stride; @@ -1272,42 +1700,36 @@ create_bits (pixman_format_code_t format, if (rowstride_bytes) *rowstride_bytes = stride; - return calloc (buf_size, 1); + if (clear) + return calloc (buf_size, 1); + else + return malloc (buf_size); } -PIXMAN_EXPORT pixman_image_t * -pixman_image_create_bits (pixman_format_code_t format, - int width, - int height, - uint32_t * bits, - int rowstride_bytes) +pixman_bool_t +_pixman_bits_image_init (pixman_image_t * image, + pixman_format_code_t format, + int width, + int height, + uint32_t * bits, + int rowstride, + pixman_bool_t clear) { - pixman_image_t *image; uint32_t *free_me = NULL; - /* must be a whole number of uint32_t's - */ - return_val_if_fail ( - bits == NULL || (rowstride_bytes % sizeof (uint32_t)) == 0, NULL); - - return_val_if_fail (PIXMAN_FORMAT_BPP (format) >= PIXMAN_FORMAT_DEPTH (format), NULL); - if (!bits && width && height) { - free_me = bits = create_bits (format, width, height, &rowstride_bytes); + int rowstride_bytes; + + free_me = bits = create_bits (format, width, height, &rowstride_bytes, clear); + if (!bits) - return NULL; + return FALSE; + + rowstride = rowstride_bytes / (int) sizeof (uint32_t); } - image = _pixman_image_allocate (); - - if (!image) - { - if (free_me) - free (free_me); - - return NULL; - } + _pixman_image_init (image); image->type = BITS; image->bits.format = format; @@ -1317,15 +1739,70 @@ pixman_image_create_bits (pixman_format_code_t format, image->bits.free_me = free_me; image->bits.read_func = NULL; image->bits.write_func = NULL; - - /* The rowstride is stored in number of uint32_t */ - image->bits.rowstride = rowstride_bytes / (int) sizeof (uint32_t); - + image->bits.rowstride = rowstride; image->bits.indexed = NULL; image->common.property_changed = bits_image_property_changed; _pixman_image_reset_clip_region (image); + return TRUE; +} + +static pixman_image_t * +create_bits_image_internal (pixman_format_code_t format, + int width, + int height, + uint32_t * bits, + int rowstride_bytes, + pixman_bool_t clear) +{ + pixman_image_t *image; + + /* must be a whole number of uint32_t's + */ + return_val_if_fail ( + bits == NULL || (rowstride_bytes % sizeof (uint32_t)) == 0, NULL); + + return_val_if_fail (PIXMAN_FORMAT_BPP (format) >= PIXMAN_FORMAT_DEPTH (format), NULL); + + image = _pixman_image_allocate (); + + if (!image) + return NULL; + + if (!_pixman_bits_image_init (image, format, width, height, bits, + rowstride_bytes / (int) sizeof (uint32_t), + clear)) + { + free (image); + return NULL; + } + return image; } + +/* If bits is NULL, a buffer will be allocated and initialized to 0 */ +PIXMAN_EXPORT pixman_image_t * +pixman_image_create_bits (pixman_format_code_t format, + int width, + int height, + uint32_t * bits, + int rowstride_bytes) +{ + return create_bits_image_internal ( + format, width, height, bits, rowstride_bytes, TRUE); +} + + +/* If bits is NULL, a buffer will be allocated and _not_ initialized */ +PIXMAN_EXPORT pixman_image_t * +pixman_image_create_bits_no_clear (pixman_format_code_t format, + int width, + int height, + uint32_t * bits, + int rowstride_bytes) +{ + return create_bits_image_internal ( + format, width, height, bits, rowstride_bytes, FALSE); +} diff --git a/programs/develop/libraries/pixman/pixman-combine-float.c b/programs/develop/libraries/pixman/pixman-combine-float.c new file mode 100644 index 0000000000..5ea739f766 --- /dev/null +++ b/programs/develop/libraries/pixman/pixman-combine-float.c @@ -0,0 +1,1016 @@ +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ +/* + * Copyright © 2010, 2012 Soren Sandmann Pedersen + * Copyright © 2010, 2012 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Soren Sandmann Pedersen (sandmann@cs.au.dk) + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include + +#include "pixman-private.h" + +/* Workaround for http://gcc.gnu.org/PR54965 */ +/* GCC 4.6 has problems with force_inline, so just use normal inline instead */ +#if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 6) +#undef force_inline +#define force_inline __inline__ +#endif + +typedef float (* combine_channel_t) (float sa, float s, float da, float d); + +static force_inline void +combine_inner (pixman_bool_t component, + float *dest, const float *src, const float *mask, int n_pixels, + combine_channel_t combine_a, combine_channel_t combine_c) +{ + int i; + + if (!mask) + { + for (i = 0; i < 4 * n_pixels; i += 4) + { + float sa = src[i + 0]; + float sr = src[i + 1]; + float sg = src[i + 2]; + float sb = src[i + 3]; + + float da = dest[i + 0]; + float dr = dest[i + 1]; + float dg = dest[i + 2]; + float db = dest[i + 3]; + + dest[i + 0] = combine_a (sa, sa, da, da); + dest[i + 1] = combine_c (sa, sr, da, dr); + dest[i + 2] = combine_c (sa, sg, da, dg); + dest[i + 3] = combine_c (sa, sb, da, db); + } + } + else + { + for (i = 0; i < 4 * n_pixels; i += 4) + { + float sa, sr, sg, sb; + float ma, mr, mg, mb; + float da, dr, dg, db; + + sa = src[i + 0]; + sr = src[i + 1]; + sg = src[i + 2]; + sb = src[i + 3]; + + if (component) + { + ma = mask[i + 0]; + mr = mask[i + 1]; + mg = mask[i + 2]; + mb = mask[i + 3]; + + sr *= mr; + sg *= mg; + sb *= mb; + + ma *= sa; + mr *= sa; + mg *= sa; + mb *= sa; + + sa = ma; + } + else + { + ma = mask[i + 0]; + + sa *= ma; + sr *= ma; + sg *= ma; + sb *= ma; + + ma = mr = mg = mb = sa; + } + + da = dest[i + 0]; + dr = dest[i + 1]; + dg = dest[i + 2]; + db = dest[i + 3]; + + dest[i + 0] = combine_a (ma, sa, da, da); + dest[i + 1] = combine_c (mr, sr, da, dr); + dest[i + 2] = combine_c (mg, sg, da, dg); + dest[i + 3] = combine_c (mb, sb, da, db); + } + } +} + +#define MAKE_COMBINER(name, component, combine_a, combine_c) \ + static void \ + combine_ ## name ## _float (pixman_implementation_t *imp, \ + pixman_op_t op, \ + float *dest, \ + const float *src, \ + const float *mask, \ + int n_pixels) \ + { \ + combine_inner (component, dest, src, mask, n_pixels, \ + combine_a, combine_c); \ + } + +#define MAKE_COMBINERS(name, combine_a, combine_c) \ + MAKE_COMBINER(name ## _ca, TRUE, combine_a, combine_c) \ + MAKE_COMBINER(name ## _u, FALSE, combine_a, combine_c) + + +/* + * Porter/Duff operators + */ +typedef enum +{ + ZERO, + ONE, + SRC_ALPHA, + DEST_ALPHA, + INV_SA, + INV_DA, + SA_OVER_DA, + DA_OVER_SA, + INV_SA_OVER_DA, + INV_DA_OVER_SA, + ONE_MINUS_SA_OVER_DA, + ONE_MINUS_DA_OVER_SA, + ONE_MINUS_INV_DA_OVER_SA, + ONE_MINUS_INV_SA_OVER_DA +} combine_factor_t; + +#define CLAMP(f) \ + (((f) < 0)? 0 : (((f) > 1.0) ? 1.0 : (f))) + +static force_inline float +get_factor (combine_factor_t factor, float sa, float da) +{ + float f = -1; + + switch (factor) + { + case ZERO: + f = 0.0f; + break; + + case ONE: + f = 1.0f; + break; + + case SRC_ALPHA: + f = sa; + break; + + case DEST_ALPHA: + f = da; + break; + + case INV_SA: + f = 1 - sa; + break; + + case INV_DA: + f = 1 - da; + break; + + case SA_OVER_DA: + if (FLOAT_IS_ZERO (da)) + f = 1.0f; + else + f = CLAMP (sa / da); + break; + + case DA_OVER_SA: + if (FLOAT_IS_ZERO (sa)) + f = 1.0f; + else + f = CLAMP (da / sa); + break; + + case INV_SA_OVER_DA: + if (FLOAT_IS_ZERO (da)) + f = 1.0f; + else + f = CLAMP ((1.0f - sa) / da); + break; + + case INV_DA_OVER_SA: + if (FLOAT_IS_ZERO (sa)) + f = 1.0f; + else + f = CLAMP ((1.0f - da) / sa); + break; + + case ONE_MINUS_SA_OVER_DA: + if (FLOAT_IS_ZERO (da)) + f = 0.0f; + else + f = CLAMP (1.0f - sa / da); + break; + + case ONE_MINUS_DA_OVER_SA: + if (FLOAT_IS_ZERO (sa)) + f = 0.0f; + else + f = CLAMP (1.0f - da / sa); + break; + + case ONE_MINUS_INV_DA_OVER_SA: + if (FLOAT_IS_ZERO (sa)) + f = 0.0f; + else + f = CLAMP (1.0f - (1.0f - da) / sa); + break; + + case ONE_MINUS_INV_SA_OVER_DA: + if (FLOAT_IS_ZERO (da)) + f = 0.0f; + else + f = CLAMP (1.0f - (1.0f - sa) / da); + break; + } + + return f; +} + +#define MAKE_PD_COMBINERS(name, a, b) \ + static float force_inline \ + pd_combine_ ## name (float sa, float s, float da, float d) \ + { \ + const float fa = get_factor (a, sa, da); \ + const float fb = get_factor (b, sa, da); \ + \ + return MIN (1.0f, s * fa + d * fb); \ + } \ + \ + MAKE_COMBINERS(name, pd_combine_ ## name, pd_combine_ ## name) + +MAKE_PD_COMBINERS (clear, ZERO, ZERO) +MAKE_PD_COMBINERS (src, ONE, ZERO) +MAKE_PD_COMBINERS (dst, ZERO, ONE) +MAKE_PD_COMBINERS (over, ONE, INV_SA) +MAKE_PD_COMBINERS (over_reverse, INV_DA, ONE) +MAKE_PD_COMBINERS (in, DEST_ALPHA, ZERO) +MAKE_PD_COMBINERS (in_reverse, ZERO, SRC_ALPHA) +MAKE_PD_COMBINERS (out, INV_DA, ZERO) +MAKE_PD_COMBINERS (out_reverse, ZERO, INV_SA) +MAKE_PD_COMBINERS (atop, DEST_ALPHA, INV_SA) +MAKE_PD_COMBINERS (atop_reverse, INV_DA, SRC_ALPHA) +MAKE_PD_COMBINERS (xor, INV_DA, INV_SA) +MAKE_PD_COMBINERS (add, ONE, ONE) + +MAKE_PD_COMBINERS (saturate, INV_DA_OVER_SA, ONE) + +MAKE_PD_COMBINERS (disjoint_clear, ZERO, ZERO) +MAKE_PD_COMBINERS (disjoint_src, ONE, ZERO) +MAKE_PD_COMBINERS (disjoint_dst, ZERO, ONE) +MAKE_PD_COMBINERS (disjoint_over, ONE, INV_SA_OVER_DA) +MAKE_PD_COMBINERS (disjoint_over_reverse, INV_DA_OVER_SA, ONE) +MAKE_PD_COMBINERS (disjoint_in, ONE_MINUS_INV_DA_OVER_SA, ZERO) +MAKE_PD_COMBINERS (disjoint_in_reverse, ZERO, ONE_MINUS_INV_SA_OVER_DA) +MAKE_PD_COMBINERS (disjoint_out, INV_DA_OVER_SA, ZERO) +MAKE_PD_COMBINERS (disjoint_out_reverse, ZERO, INV_SA_OVER_DA) +MAKE_PD_COMBINERS (disjoint_atop, ONE_MINUS_INV_DA_OVER_SA, INV_SA_OVER_DA) +MAKE_PD_COMBINERS (disjoint_atop_reverse, INV_DA_OVER_SA, ONE_MINUS_INV_SA_OVER_DA) +MAKE_PD_COMBINERS (disjoint_xor, INV_DA_OVER_SA, INV_SA_OVER_DA) + +MAKE_PD_COMBINERS (conjoint_clear, ZERO, ZERO) +MAKE_PD_COMBINERS (conjoint_src, ONE, ZERO) +MAKE_PD_COMBINERS (conjoint_dst, ZERO, ONE) +MAKE_PD_COMBINERS (conjoint_over, ONE, ONE_MINUS_SA_OVER_DA) +MAKE_PD_COMBINERS (conjoint_over_reverse, ONE_MINUS_DA_OVER_SA, ONE) +MAKE_PD_COMBINERS (conjoint_in, DA_OVER_SA, ZERO) +MAKE_PD_COMBINERS (conjoint_in_reverse, ZERO, SA_OVER_DA) +MAKE_PD_COMBINERS (conjoint_out, ONE_MINUS_DA_OVER_SA, ZERO) +MAKE_PD_COMBINERS (conjoint_out_reverse, ZERO, ONE_MINUS_SA_OVER_DA) +MAKE_PD_COMBINERS (conjoint_atop, DA_OVER_SA, ONE_MINUS_SA_OVER_DA) +MAKE_PD_COMBINERS (conjoint_atop_reverse, ONE_MINUS_DA_OVER_SA, SA_OVER_DA) +MAKE_PD_COMBINERS (conjoint_xor, ONE_MINUS_DA_OVER_SA, ONE_MINUS_SA_OVER_DA) + +/* + * PDF blend modes: + * + * The following blend modes have been taken from the PDF ISO 32000 + * specification, which at this point in time is available from + * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf + * The relevant chapters are 11.3.5 and 11.3.6. + * The formula for computing the final pixel color given in 11.3.6 is: + * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs) + * with B() being the blend function. + * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs + * + * These blend modes should match the SVG filter draft specification, as + * it has been designed to mirror ISO 32000. Note that at the current point + * no released draft exists that shows this, as the formulas have not been + * updated yet after the release of ISO 32000. + * + * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and + * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an + * argument. Note that this implementation operates on premultiplied colors, + * while the PDF specification does not. Therefore the code uses the formula + * ar.Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as) + */ + +#define MAKE_SEPARABLE_PDF_COMBINERS(name) \ + static force_inline float \ + combine_ ## name ## _a (float sa, float s, float da, float d) \ + { \ + return da + sa - da * sa; \ + } \ + \ + static force_inline float \ + combine_ ## name ## _c (float sa, float s, float da, float d) \ + { \ + float f = (1 - sa) * d + (1 - da) * s; \ + \ + return f + blend_ ## name (sa, s, da, d); \ + } \ + \ + MAKE_COMBINERS (name, combine_ ## name ## _a, combine_ ## name ## _c) + +static force_inline float +blend_multiply (float sa, float s, float da, float d) +{ + return d * s; +} + +static force_inline float +blend_screen (float sa, float s, float da, float d) +{ + return d * sa + s * da - s * d; +} + +static force_inline float +blend_overlay (float sa, float s, float da, float d) +{ + if (2 * d < da) + return 2 * s * d; + else + return sa * da - 2 * (da - d) * (sa - s); +} + +static force_inline float +blend_darken (float sa, float s, float da, float d) +{ + s = s * da; + d = d * sa; + + if (s > d) + return d; + else + return s; +} + +static force_inline float +blend_lighten (float sa, float s, float da, float d) +{ + s = s * da; + d = d * sa; + + if (s > d) + return s; + else + return d; +} + +static force_inline float +blend_color_dodge (float sa, float s, float da, float d) +{ + if (FLOAT_IS_ZERO (d)) + return 0.0f; + else if (d * sa >= sa * da - s * da) + return sa * da; + else if (FLOAT_IS_ZERO (sa - s)) + return sa * da; + else + return sa * sa * d / (sa - s); +} + +static force_inline float +blend_color_burn (float sa, float s, float da, float d) +{ + if (d >= da) + return sa * da; + else if (sa * (da - d) >= s * da) + return 0.0f; + else if (FLOAT_IS_ZERO (s)) + return 0.0f; + else + return sa * (da - sa * (da - d) / s); +} + +static force_inline float +blend_hard_light (float sa, float s, float da, float d) +{ + if (2 * s < sa) + return 2 * s * d; + else + return sa * da - 2 * (da - d) * (sa - s); +} + +static force_inline float +blend_soft_light (float sa, float s, float da, float d) +{ + if (2 * s < sa) + { + if (FLOAT_IS_ZERO (da)) + return d * sa; + else + return d * sa - d * (da - d) * (sa - 2 * s) / da; + } + else + { + if (FLOAT_IS_ZERO (da)) + { + return 0.0f; + } + else + { + if (4 * d <= da) + return d * sa + (2 * s - sa) * d * ((16 * d / da - 12) * d / da + 3); + else + return d * sa + (sqrtf (d * da) - d) * (2 * s - sa); + } + } +} + +static force_inline float +blend_difference (float sa, float s, float da, float d) +{ + float dsa = d * sa; + float sda = s * da; + + if (sda < dsa) + return dsa - sda; + else + return sda - dsa; +} + +static force_inline float +blend_exclusion (float sa, float s, float da, float d) +{ + return s * da + d * sa - 2 * d * s; +} + +MAKE_SEPARABLE_PDF_COMBINERS (multiply) +MAKE_SEPARABLE_PDF_COMBINERS (screen) +MAKE_SEPARABLE_PDF_COMBINERS (overlay) +MAKE_SEPARABLE_PDF_COMBINERS (darken) +MAKE_SEPARABLE_PDF_COMBINERS (lighten) +MAKE_SEPARABLE_PDF_COMBINERS (color_dodge) +MAKE_SEPARABLE_PDF_COMBINERS (color_burn) +MAKE_SEPARABLE_PDF_COMBINERS (hard_light) +MAKE_SEPARABLE_PDF_COMBINERS (soft_light) +MAKE_SEPARABLE_PDF_COMBINERS (difference) +MAKE_SEPARABLE_PDF_COMBINERS (exclusion) + +/* + * PDF nonseperable blend modes. + * + * These are implemented using the following functions to operate in Hsl + * space, with Cmax, Cmid, Cmin referring to the max, mid and min value + * of the red, green and blue components. + * + * LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue + * + * clip_color (C): + * l = LUM (C) + * min = Cmin + * max = Cmax + * if n < 0.0 + * C = l + (((C – l) × l) ⁄ (l – min)) + * if x > 1.0 + * C = l + (((C – l) × (1 – l)) (max – l)) + * return C + * + * set_lum (C, l): + * d = l – LUM (C) + * C += d + * return clip_color (C) + * + * SAT (C) = CH_MAX (C) - CH_MIN (C) + * + * set_sat (C, s): + * if Cmax > Cmin + * Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) ) + * Cmax = s + * else + * Cmid = Cmax = 0.0 + * Cmin = 0.0 + * return C + */ + +/* For premultiplied colors, we need to know what happens when C is + * multiplied by a real number. LUM and SAT are linear: + * + * LUM (r × C) = r × LUM (C) SAT (r × C) = r × SAT (C) + * + * If we extend clip_color with an extra argument a and change + * + * if x >= 1.0 + * + * into + * + * if x >= a + * + * then clip_color is also linear: + * + * r * clip_color (C, a) = clip_color (r_c, ra); + * + * for positive r. + * + * Similarly, we can extend set_lum with an extra argument that is just passed + * on to clip_color: + * + * r × set_lum ( C, l, a) + * + * = r × clip_color ( C + l - LUM (C), a) + * + * = clip_color ( r * C + r × l - LUM (r × C), r * a) + * + * = set_lum ( r * C, r * l, r * a) + * + * Finally, set_sat: + * + * r * set_sat (C, s) = set_sat (x * C, r * s) + * + * The above holds for all non-zero x because they x'es in the fraction for + * C_mid cancel out. Specifically, it holds for x = r: + * + * r * set_sat (C, s) = set_sat (r_c, rs) + * + * + * + * + * So, for the non-separable PDF blend modes, we have (using s, d for + * non-premultiplied colors, and S, D for premultiplied: + * + * Color: + * + * a_s * a_d * B(s, d) + * = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1) + * = set_lum (S * a_d, a_s * LUM (D), a_s * a_d) + * + * + * Luminosity: + * + * a_s * a_d * B(s, d) + * = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1) + * = set_lum (a_s * D, a_d * LUM(S), a_s * a_d) + * + * + * Saturation: + * + * a_s * a_d * B(s, d) + * = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1) + * = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)), + * a_s * LUM (D), a_s * a_d) + * = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d)) + * + * Hue: + * + * a_s * a_d * B(s, d) + * = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1) + * = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d) + * + */ + +typedef struct +{ + float r; + float g; + float b; +} rgb_t; + +static force_inline float +minf (float a, float b) +{ + return a < b? a : b; +} + +static force_inline float +maxf (float a, float b) +{ + return a > b? a : b; +} + +static force_inline float +channel_min (const rgb_t *c) +{ + return minf (minf (c->r, c->g), c->b); +} + +static force_inline float +channel_max (const rgb_t *c) +{ + return maxf (maxf (c->r, c->g), c->b); +} + +static force_inline float +get_lum (const rgb_t *c) +{ + return c->r * 0.3f + c->g * 0.59f + c->b * 0.11f; +} + +static force_inline float +get_sat (const rgb_t *c) +{ + return channel_max (c) - channel_min (c); +} + +static void +clip_color (rgb_t *color, float a) +{ + float l = get_lum (color); + float n = channel_min (color); + float x = channel_max (color); + float t; + + if (n < 0.0f) + { + t = l - n; + if (FLOAT_IS_ZERO (t)) + { + color->r = 0.0f; + color->g = 0.0f; + color->b = 0.0f; + } + else + { + color->r = l + (((color->r - l) * l) / t); + color->g = l + (((color->g - l) * l) / t); + color->b = l + (((color->b - l) * l) / t); + } + } + if (x > a) + { + t = x - l; + if (FLOAT_IS_ZERO (t)) + { + color->r = a; + color->g = a; + color->b = a; + } + else + { + color->r = l + (((color->r - l) * (a - l) / t)); + color->g = l + (((color->g - l) * (a - l) / t)); + color->b = l + (((color->b - l) * (a - l) / t)); + } + } +} + +static void +set_lum (rgb_t *color, float sa, float l) +{ + float d = l - get_lum (color); + + color->r = color->r + d; + color->g = color->g + d; + color->b = color->b + d; + + clip_color (color, sa); +} + +static void +set_sat (rgb_t *src, float sat) +{ + float *max, *mid, *min; + float t; + + if (src->r > src->g) + { + if (src->r > src->b) + { + max = &(src->r); + + if (src->g > src->b) + { + mid = &(src->g); + min = &(src->b); + } + else + { + mid = &(src->b); + min = &(src->g); + } + } + else + { + max = &(src->b); + mid = &(src->r); + min = &(src->g); + } + } + else + { + if (src->r > src->b) + { + max = &(src->g); + mid = &(src->r); + min = &(src->b); + } + else + { + min = &(src->r); + + if (src->g > src->b) + { + max = &(src->g); + mid = &(src->b); + } + else + { + max = &(src->b); + mid = &(src->g); + } + } + } + + t = *max - *min; + + if (FLOAT_IS_ZERO (t)) + { + *mid = *max = 0.0f; + } + else + { + *mid = ((*mid - *min) * sat) / t; + *max = sat; + } + + *min = 0.0f; +} + +/* + * Hue: + * B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb)) + */ +static force_inline void +blend_hsl_hue (rgb_t *res, + const rgb_t *dest, float da, + const rgb_t *src, float sa) +{ + res->r = src->r * da; + res->g = src->g * da; + res->b = src->b * da; + + set_sat (res, get_sat (dest) * sa); + set_lum (res, sa * da, get_lum (dest) * sa); +} + +/* + * Saturation: + * B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb)) + */ +static force_inline void +blend_hsl_saturation (rgb_t *res, + const rgb_t *dest, float da, + const rgb_t *src, float sa) +{ + res->r = dest->r * sa; + res->g = dest->g * sa; + res->b = dest->b * sa; + + set_sat (res, get_sat (src) * da); + set_lum (res, sa * da, get_lum (dest) * sa); +} + +/* + * Color: + * B(Cb, Cs) = set_lum (Cs, LUM (Cb)) + */ +static force_inline void +blend_hsl_color (rgb_t *res, + const rgb_t *dest, float da, + const rgb_t *src, float sa) +{ + res->r = src->r * da; + res->g = src->g * da; + res->b = src->b * da; + + set_lum (res, sa * da, get_lum (dest) * sa); +} + +/* + * Luminosity: + * B(Cb, Cs) = set_lum (Cb, LUM (Cs)) + */ +static force_inline void +blend_hsl_luminosity (rgb_t *res, + const rgb_t *dest, float da, + const rgb_t *src, float sa) +{ + res->r = dest->r * sa; + res->g = dest->g * sa; + res->b = dest->b * sa; + + set_lum (res, sa * da, get_lum (src) * da); +} + +#define MAKE_NON_SEPARABLE_PDF_COMBINERS(name) \ + static void \ + combine_ ## name ## _u_float (pixman_implementation_t *imp, \ + pixman_op_t op, \ + float *dest, \ + const float *src, \ + const float *mask, \ + int n_pixels) \ + { \ + int i; \ + \ + for (i = 0; i < 4 * n_pixels; i += 4) \ + { \ + float sa, da; \ + rgb_t sc, dc, rc; \ + \ + sa = src[i + 0]; \ + sc.r = src[i + 1]; \ + sc.g = src[i + 2]; \ + sc.b = src[i + 3]; \ + \ + da = dest[i + 0]; \ + dc.r = dest[i + 1]; \ + dc.g = dest[i + 2]; \ + dc.b = dest[i + 3]; \ + \ + if (mask) \ + { \ + float ma = mask[i + 0]; \ + \ + /* Component alpha is not supported for HSL modes */ \ + sa *= ma; \ + sc.r *= ma; \ + sc.g *= ma; \ + sc.g *= ma; \ + } \ + \ + blend_ ## name (&rc, &dc, da, &sc, sa); \ + \ + dest[i + 0] = sa + da - sa * da; \ + dest[i + 1] = (1 - sa) * dc.r + (1 - da) * sc.r + rc.r; \ + dest[i + 2] = (1 - sa) * dc.g + (1 - da) * sc.g + rc.g; \ + dest[i + 3] = (1 - sa) * dc.b + (1 - da) * sc.b + rc.b; \ + } \ + } + +MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_hue) +MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_saturation) +MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_color) +MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_luminosity) + +void +_pixman_setup_combiner_functions_float (pixman_implementation_t *imp) +{ + /* Unified alpha */ + imp->combine_float[PIXMAN_OP_CLEAR] = combine_clear_u_float; + imp->combine_float[PIXMAN_OP_SRC] = combine_src_u_float; + imp->combine_float[PIXMAN_OP_DST] = combine_dst_u_float; + imp->combine_float[PIXMAN_OP_OVER] = combine_over_u_float; + imp->combine_float[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u_float; + imp->combine_float[PIXMAN_OP_IN] = combine_in_u_float; + imp->combine_float[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u_float; + imp->combine_float[PIXMAN_OP_OUT] = combine_out_u_float; + imp->combine_float[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u_float; + imp->combine_float[PIXMAN_OP_ATOP] = combine_atop_u_float; + imp->combine_float[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u_float; + imp->combine_float[PIXMAN_OP_XOR] = combine_xor_u_float; + imp->combine_float[PIXMAN_OP_ADD] = combine_add_u_float; + imp->combine_float[PIXMAN_OP_SATURATE] = combine_saturate_u_float; + + /* Disjoint, unified */ + imp->combine_float[PIXMAN_OP_DISJOINT_CLEAR] = combine_disjoint_clear_u_float; + imp->combine_float[PIXMAN_OP_DISJOINT_SRC] = combine_disjoint_src_u_float; + imp->combine_float[PIXMAN_OP_DISJOINT_DST] = combine_disjoint_dst_u_float; + imp->combine_float[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u_float; + imp->combine_float[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_disjoint_over_reverse_u_float; + imp->combine_float[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u_float; + imp->combine_float[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u_float; + imp->combine_float[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u_float; + imp->combine_float[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u_float; + imp->combine_float[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u_float; + imp->combine_float[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u_float; + imp->combine_float[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u_float; + + /* Conjoint, unified */ + imp->combine_float[PIXMAN_OP_CONJOINT_CLEAR] = combine_conjoint_clear_u_float; + imp->combine_float[PIXMAN_OP_CONJOINT_SRC] = combine_conjoint_src_u_float; + imp->combine_float[PIXMAN_OP_CONJOINT_DST] = combine_conjoint_dst_u_float; + imp->combine_float[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u_float; + imp->combine_float[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u_float; + imp->combine_float[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u_float; + imp->combine_float[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u_float; + imp->combine_float[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u_float; + imp->combine_float[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u_float; + imp->combine_float[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u_float; + imp->combine_float[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u_float; + imp->combine_float[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_u_float; + + /* PDF operators, unified */ + imp->combine_float[PIXMAN_OP_MULTIPLY] = combine_multiply_u_float; + imp->combine_float[PIXMAN_OP_SCREEN] = combine_screen_u_float; + imp->combine_float[PIXMAN_OP_OVERLAY] = combine_overlay_u_float; + imp->combine_float[PIXMAN_OP_DARKEN] = combine_darken_u_float; + imp->combine_float[PIXMAN_OP_LIGHTEN] = combine_lighten_u_float; + imp->combine_float[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_u_float; + imp->combine_float[PIXMAN_OP_COLOR_BURN] = combine_color_burn_u_float; + imp->combine_float[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u_float; + imp->combine_float[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_u_float; + imp->combine_float[PIXMAN_OP_DIFFERENCE] = combine_difference_u_float; + imp->combine_float[PIXMAN_OP_EXCLUSION] = combine_exclusion_u_float; + + imp->combine_float[PIXMAN_OP_HSL_HUE] = combine_hsl_hue_u_float; + imp->combine_float[PIXMAN_OP_HSL_SATURATION] = combine_hsl_saturation_u_float; + imp->combine_float[PIXMAN_OP_HSL_COLOR] = combine_hsl_color_u_float; + imp->combine_float[PIXMAN_OP_HSL_LUMINOSITY] = combine_hsl_luminosity_u_float; + + /* Component alpha combiners */ + imp->combine_float_ca[PIXMAN_OP_CLEAR] = combine_clear_ca_float; + imp->combine_float_ca[PIXMAN_OP_SRC] = combine_src_ca_float; + imp->combine_float_ca[PIXMAN_OP_DST] = combine_dst_ca_float; + imp->combine_float_ca[PIXMAN_OP_OVER] = combine_over_ca_float; + imp->combine_float_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_IN] = combine_in_ca_float; + imp->combine_float_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_OUT] = combine_out_ca_float; + imp->combine_float_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_ATOP] = combine_atop_ca_float; + imp->combine_float_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_XOR] = combine_xor_ca_float; + imp->combine_float_ca[PIXMAN_OP_ADD] = combine_add_ca_float; + imp->combine_float_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca_float; + + /* Disjoint CA */ + imp->combine_float_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_disjoint_clear_ca_float; + imp->combine_float_ca[PIXMAN_OP_DISJOINT_SRC] = combine_disjoint_src_ca_float; + imp->combine_float_ca[PIXMAN_OP_DISJOINT_DST] = combine_disjoint_dst_ca_float; + imp->combine_float_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca_float; + imp->combine_float_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_disjoint_over_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca_float; + imp->combine_float_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca_float; + imp->combine_float_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca_float; + imp->combine_float_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca_float; + + /* Conjoint CA */ + imp->combine_float_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_conjoint_clear_ca_float; + imp->combine_float_ca[PIXMAN_OP_CONJOINT_SRC] = combine_conjoint_src_ca_float; + imp->combine_float_ca[PIXMAN_OP_CONJOINT_DST] = combine_conjoint_dst_ca_float; + imp->combine_float_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca_float; + imp->combine_float_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca_float; + imp->combine_float_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca_float; + imp->combine_float_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca_float; + imp->combine_float_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_ca_float; + + /* PDF operators CA */ + imp->combine_float_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca_float; + imp->combine_float_ca[PIXMAN_OP_SCREEN] = combine_screen_ca_float; + imp->combine_float_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca_float; + imp->combine_float_ca[PIXMAN_OP_DARKEN] = combine_darken_ca_float; + imp->combine_float_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca_float; + imp->combine_float_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca_float; + imp->combine_float_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca_float; + imp->combine_float_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca_float; + imp->combine_float_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca_float; + imp->combine_float_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca_float; + imp->combine_float_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca_float; + + /* It is not clear that these make sense, so make them noops for now */ + imp->combine_float_ca[PIXMAN_OP_HSL_HUE] = combine_dst_u_float; + imp->combine_float_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst_u_float; + imp->combine_float_ca[PIXMAN_OP_HSL_COLOR] = combine_dst_u_float; + imp->combine_float_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst_u_float; +} diff --git a/programs/develop/libraries/pixman/pixman-combine32.c b/programs/develop/libraries/pixman/pixman-combine32.c index d98d3f8383..3ac7576bdc 100644 --- a/programs/develop/libraries/pixman/pixman-combine32.c +++ b/programs/develop/libraries/pixman/pixman-combine32.c @@ -1,7 +1,26 @@ -/* WARNING: This file is generated by combine.pl from combine.inc. - Please edit one of those files rather than this one. */ - -#line 1 "pixman-combine.c.template" +/* + * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. + * 2005 Lars Knoll & Zack Rusin, Trolltech + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Keith Packard not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Keith Packard makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + */ #ifdef HAVE_CONFIG_H #include #endif @@ -10,10 +29,9 @@ #include #include "pixman-private.h" - #include "pixman-combine32.h" -/*** per channel helper functions ***/ +/* component alpha helper functions */ static void combine_mask_ca (uint32_t *src, uint32_t *mask) @@ -95,15 +113,11 @@ combine_mask_alpha_ca (const uint32_t *src, uint32_t *mask) /* * There are two ways of handling alpha -- either as a single unified value or * a separate value for each component, hence each macro must have two - * versions. The unified alpha version has a 'U' at the end of the name, - * the component version has a 'C'. Similarly, functions which deal with + * versions. The unified alpha version has a 'u' at the end of the name, + * the component version has a 'ca'. Similarly, functions which deal with * this difference will have two versions using the same convention. */ -/* - * All of the composing functions - */ - static force_inline uint32_t combine_mask (const uint32_t *src, const uint32_t *mask, int i) { @@ -158,7 +172,9 @@ combine_src_u (pixman_implementation_t *imp, int i; if (!mask) + { memcpy (dest, src, width * sizeof (uint32_t)); + } else { for (i = 0; i < width; ++i) @@ -170,7 +186,6 @@ combine_src_u (pixman_implementation_t *imp, } } -/* if the Src is opaque, call combine_src_u */ static void combine_over_u (pixman_implementation_t *imp, pixman_op_t op, @@ -181,18 +196,61 @@ combine_over_u (pixman_implementation_t *imp, { int i; - for (i = 0; i < width; ++i) + if (!mask) { - uint32_t s = combine_mask (src, mask, i); - uint32_t d = *(dest + i); - uint32_t ia = ALPHA_8 (~s); - - UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); - *(dest + i) = d; + for (i = 0; i < width; ++i) + { + uint32_t s = *(src + i); + uint32_t a = ALPHA_8 (s); + if (a == 0xFF) + { + *(dest + i) = s; + } + else if (s) + { + uint32_t d = *(dest + i); + uint32_t ia = a ^ 0xFF; + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); + *(dest + i) = d; + } + } + } + else + { + for (i = 0; i < width; ++i) + { + uint32_t m = ALPHA_8 (*(mask + i)); + if (m == 0xFF) + { + uint32_t s = *(src + i); + uint32_t a = ALPHA_8 (s); + if (a == 0xFF) + { + *(dest + i) = s; + } + else if (s) + { + uint32_t d = *(dest + i); + uint32_t ia = a ^ 0xFF; + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); + *(dest + i) = d; + } + } + else if (m) + { + uint32_t s = *(src + i); + if (s) + { + uint32_t d = *(dest + i); + UN8x4_MUL_UN8 (s, m); + UN8x4_MUL_UN8_ADD_UN8x4 (d, ALPHA_8 (~s), s); + *(dest + i) = d; + } + } + } } } -/* if the Dst is opaque, this is a noop */ static void combine_over_reverse_u (pixman_implementation_t *imp, pixman_op_t op, @@ -213,7 +271,6 @@ combine_over_reverse_u (pixman_implementation_t *imp, } } -/* if the Dst is opaque, call combine_src_u */ static void combine_in_u (pixman_implementation_t *imp, pixman_op_t op, @@ -233,7 +290,6 @@ combine_in_u (pixman_implementation_t *imp, } } -/* if the Src is opaque, this is a noop */ static void combine_in_reverse_u (pixman_implementation_t *imp, pixman_op_t op, @@ -254,7 +310,6 @@ combine_in_reverse_u (pixman_implementation_t *imp, } } -/* if the Dst is opaque, call combine_clear */ static void combine_out_u (pixman_implementation_t *imp, pixman_op_t op, @@ -274,7 +329,6 @@ combine_out_u (pixman_implementation_t *imp, } } -/* if the Src is opaque, call combine_clear */ static void combine_out_reverse_u (pixman_implementation_t *imp, pixman_op_t op, @@ -295,9 +349,6 @@ combine_out_reverse_u (pixman_implementation_t *imp, } } -/* if the Src is opaque, call combine_in_u */ -/* if the Dst is opaque, call combine_over_u */ -/* if both the Src and Dst are opaque, call combine_src_u */ static void combine_atop_u (pixman_implementation_t *imp, pixman_op_t op, @@ -320,9 +371,6 @@ combine_atop_u (pixman_implementation_t *imp, } } -/* if the Src is opaque, call combine_over_reverse_u */ -/* if the Dst is opaque, call combine_in_reverse_u */ -/* if both the Src and Dst are opaque, call combine_dst_u */ static void combine_atop_reverse_u (pixman_implementation_t *imp, pixman_op_t op, @@ -345,9 +393,6 @@ combine_atop_reverse_u (pixman_implementation_t *imp, } } -/* if the Src is opaque, call combine_over_u */ -/* if the Dst is opaque, call combine_over_reverse_u */ -/* if both the Src and Dst are opaque, call combine_clear */ static void combine_xor_u (pixman_implementation_t *imp, pixman_op_t op, @@ -389,9 +434,6 @@ combine_add_u (pixman_implementation_t *imp, } } -/* if the Src is opaque, call combine_add_u */ -/* if the Dst is opaque, call combine_add_u */ -/* if both the Src and Dst are opaque, call combine_add_u */ static void combine_saturate_u (pixman_implementation_t *imp, pixman_op_t op, @@ -441,14 +483,13 @@ combine_saturate_u (pixman_implementation_t *imp, * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an * argument. Note that this implementation operates on premultiplied colors, * while the PDF specification does not. Therefore the code uses the formula - * ar.Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as) + * Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as) */ /* * Multiply * B(Dca, ad, Sca, as) = Dca.Sca */ - static void combine_multiply_u (pixman_implementation_t *imp, pixman_op_t op, @@ -493,7 +534,7 @@ combine_multiply_ca (pixman_implementation_t *imp, uint32_t r = d; uint32_t dest_ia = ALPHA_8 (~d); - combine_mask_value_ca (&s, &m); + combine_mask_ca (&s, &m); UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (r, ~m, s, dest_ia); UN8x4_MUL_UN8x4 (d, s); @@ -526,7 +567,7 @@ combine_multiply_ca (pixman_implementation_t *imp, UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (result, isa, s, ida); \ \ *(dest + i) = result + \ - (DIV_ONE_UN8 (sa * da) << A_SHIFT) + \ + (DIV_ONE_UN8 (sa * (uint32_t)da) << A_SHIFT) + \ (blend_ ## name (RED_8 (d), da, RED_8 (s), sa) << R_SHIFT) + \ (blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), sa) << G_SHIFT) + \ (blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), sa)); \ @@ -550,13 +591,13 @@ combine_multiply_ca (pixman_implementation_t *imp, uint8_t ida = ~da; \ uint32_t result; \ \ - combine_mask_value_ca (&s, &m); \ + combine_mask_ca (&s, &m); \ \ result = d; \ UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (result, ~m, s, ida); \ \ result += \ - (DIV_ONE_UN8 (ALPHA_8 (m) * da) << A_SHIFT) + \ + (DIV_ONE_UN8 (ALPHA_8 (m) * (uint32_t)da) << A_SHIFT) + \ (blend_ ## name (RED_8 (d), da, RED_8 (s), RED_8 (m)) << R_SHIFT) + \ (blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), GREEN_8 (m)) << G_SHIFT) + \ (blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), BLUE_8 (m))); \ @@ -853,7 +894,7 @@ PDF_SEPARABLE_BLEND_MODE (exclusion) * * r * set_sat (C, s) = set_sat (x * C, r * s) * - * The above holds for all non-zero x, because they x'es in the fraction for + * The above holds for all non-zero x, because the x'es in the fraction for * C_mid cancel out. Specifically, it holds for x = r: * * r * set_sat (C, s) = set_sat (r_c, rs) @@ -889,8 +930,7 @@ PDF_SEPARABLE_BLEND_MODE (exclusion) * * a_s * a_d * B(s, d) * = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1) - * = a_s * a_d * set_lum (set_sat (a_d * S, a_s * SAT (D)), - * a_s * LUM (D), a_s * a_d) + * = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d) * */ @@ -931,7 +971,7 @@ PDF_SEPARABLE_BLEND_MODE (exclusion) blend_ ## name (c, dc, da, sc, sa); \ \ *(dest + i) = result + \ - (DIV_ONE_UN8 (sa * da) << A_SHIFT) + \ + (DIV_ONE_UN8 (sa * (uint32_t)da) << A_SHIFT) + \ (DIV_ONE_UN8 (c[0]) << R_SHIFT) + \ (DIV_ONE_UN8 (c[1]) << G_SHIFT) + \ (DIV_ONE_UN8 (c[2])); \ @@ -1148,9 +1188,7 @@ PDF_NON_SEPARABLE_BLEND_MODE (hsl_luminosity) #undef CH_MIN #undef PDF_NON_SEPARABLE_BLEND_MODE -/* Overlay - * - * All of the disjoint composing functions +/* All of the disjoint/conjoint composing functions * * The four entries in the first column indicate what source contributions * come from each of the four areas of the picture -- areas covered by neither @@ -1171,6 +1209,9 @@ PDF_NON_SEPARABLE_BLEND_MODE (hsl_luminosity) * (0,0,B,A) max(1-(1-b)/a,0) min(1,(1-a)/b) min(1,b/a) max(1-a/b,0) * (0,A,0,B) min(1,(1-b)/a) max(1-(1-a)/b,0) max(1-b/a,0) min(1,a/b) * (0,A,B,0) min(1,(1-b)/a) min(1,(1-a)/b) max(1-b/a,0) max(1-a/b,0) + * + * See http://marc.info/?l=xfree-render&m=99792000027857&w=2 for more + * information about these operators. */ #define COMBINE_A_OUT 1 @@ -1583,9 +1624,8 @@ combine_conjoint_xor_u (pixman_implementation_t *imp, combine_conjoint_general_u (dest, src, mask, width, COMBINE_XOR); } -/************************************************************************/ -/*********************** Per Channel functions **************************/ -/************************************************************************/ + +/* Component alpha combiners */ static void combine_clear_ca (pixman_implementation_t *imp, @@ -2462,4 +2502,3 @@ _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp) imp->combine_32_ca[PIXMAN_OP_HSL_COLOR] = combine_dst; imp->combine_32_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst; } - diff --git a/programs/develop/libraries/pixman/pixman-combine32.h b/programs/develop/libraries/pixman/pixman-combine32.h index 68bde4281a..cdd56a61a1 100644 --- a/programs/develop/libraries/pixman/pixman-combine32.h +++ b/programs/develop/libraries/pixman/pixman-combine32.h @@ -1,8 +1,3 @@ -/* WARNING: This file is generated by combine.pl from combine.inc. - Please edit one of those files rather than this one. */ - -#line 1 "pixman-combine.c.template" - #define COMPONENT_SIZE 8 #define MASK 0xff #define ONE_HALF 0x80 @@ -24,19 +19,62 @@ #define GREEN_8(x) (((x) >> G_SHIFT) & MASK) #define BLUE_8(x) ((x) & MASK) +/* + * ARMv6 has UQADD8 instruction, which implements unsigned saturated + * addition for 8-bit values packed in 32-bit registers. It is very useful + * for UN8x4_ADD_UN8x4, UN8_rb_ADD_UN8_rb and ADD_UN8 macros (which would + * otherwise need a lot of arithmetic operations to simulate this operation). + * Since most of the major ARM linux distros are built for ARMv7, we are + * much less dependent on runtime CPU detection and can get practical + * benefits from conditional compilation here for a lot of users. + */ + +#if defined(USE_GCC_INLINE_ASM) && defined(__arm__) && \ + !defined(__aarch64__) && (!defined(__thumb__) || defined(__thumb2__)) +#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_7__) || \ + defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \ + defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) + +static force_inline uint32_t +un8x4_add_un8x4 (uint32_t x, uint32_t y) +{ + uint32_t t; + asm ("uqadd8 %0, %1, %2" : "=r" (t) : "%r" (x), "r" (y)); + return t; +} + +#define UN8x4_ADD_UN8x4(x, y) \ + ((x) = un8x4_add_un8x4 ((x), (y))) + +#define UN8_rb_ADD_UN8_rb(x, y, t) \ + ((t) = un8x4_add_un8x4 ((x), (y)), (x) = (t)) + +#define ADD_UN8(x, y, t) \ + ((t) = (x), un8x4_add_un8x4 ((t), (y))) + +#endif +#endif + +/*****************************************************************************/ + /* * Helper macros. */ #define MUL_UN8(a, b, t) \ - ((t) = (a) * (b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT )) + ((t) = (a) * (uint16_t)(b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT )) #define DIV_UN8(a, b) \ - (((uint16_t) (a) * MASK) / (b)) + (((uint16_t) (a) * MASK + ((b) / 2)) / (b)) +#ifndef ADD_UN8 #define ADD_UN8(x, y, t) \ ((t) = (x) + (y), \ (uint32_t) (uint8_t) ((t) | (0 - ((t) >> G_SHIFT)))) +#endif #define DIV_ONE_UN8(x) \ (((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT) @@ -61,6 +99,7 @@ /* * x_rb = min (x_rb + y_rb, 255) */ +#ifndef UN8_rb_ADD_UN8_rb #define UN8_rb_ADD_UN8_rb(x, y, t) \ do \ { \ @@ -68,6 +107,7 @@ t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \ x = (t & RB_MASK); \ } while (0) +#endif /* * x_rb = (x_rb * a_rb) / 255 @@ -213,6 +253,7 @@ /* x_c = min(x_c + y_c, 255) */ +#ifndef UN8x4_ADD_UN8x4 #define UN8x4_ADD_UN8x4(x, y) \ do \ { \ @@ -228,3 +269,4 @@ \ x = r1__ | (r2__ << G_SHIFT); \ } while (0) +#endif diff --git a/programs/develop/libraries/pixman/pixman-combine64.c b/programs/develop/libraries/pixman/pixman-combine64.c deleted file mode 100644 index 850afba6ad..0000000000 --- a/programs/develop/libraries/pixman/pixman-combine64.c +++ /dev/null @@ -1,2465 +0,0 @@ -/* WARNING: This file is generated by combine.pl from combine.inc. - Please edit one of those files rather than this one. */ - -#line 1 "pixman-combine.c.template" -#ifdef HAVE_CONFIG_H -#include -#endif - -#include -#include - -#include "pixman-private.h" - -#include "pixman-combine64.h" - -/*** per channel helper functions ***/ - -static void -combine_mask_ca (uint64_t *src, uint64_t *mask) -{ - uint64_t a = *mask; - - uint64_t x; - uint32_t xa; - - if (!a) - { - *(src) = 0; - return; - } - - x = *(src); - if (a == ~0) - { - x = x >> A_SHIFT; - x |= x << G_SHIFT; - x |= x << R_SHIFT; - *(mask) = x; - return; - } - - xa = x >> A_SHIFT; - UN16x4_MUL_UN16x4 (x, a); - *(src) = x; - - UN16x4_MUL_UN16 (a, xa); - *(mask) = a; -} - -static void -combine_mask_value_ca (uint64_t *src, const uint64_t *mask) -{ - uint64_t a = *mask; - uint64_t x; - - if (!a) - { - *(src) = 0; - return; - } - - if (a == ~0) - return; - - x = *(src); - UN16x4_MUL_UN16x4 (x, a); - *(src) = x; -} - -static void -combine_mask_alpha_ca (const uint64_t *src, uint64_t *mask) -{ - uint64_t a = *(mask); - uint64_t x; - - if (!a) - return; - - x = *(src) >> A_SHIFT; - if (x == MASK) - return; - - if (a == ~0) - { - x |= x << G_SHIFT; - x |= x << R_SHIFT; - *(mask) = x; - return; - } - - UN16x4_MUL_UN16 (a, x); - *(mask) = a; -} - -/* - * There are two ways of handling alpha -- either as a single unified value or - * a separate value for each component, hence each macro must have two - * versions. The unified alpha version has a 'U' at the end of the name, - * the component version has a 'C'. Similarly, functions which deal with - * this difference will have two versions using the same convention. - */ - -/* - * All of the composing functions - */ - -static force_inline uint64_t -combine_mask (const uint64_t *src, const uint64_t *mask, int i) -{ - uint64_t s, m; - - if (mask) - { - m = *(mask + i) >> A_SHIFT; - - if (!m) - return 0; - } - - s = *(src + i); - - if (mask) - UN16x4_MUL_UN16 (s, m); - - return s; -} - -static void -combine_clear (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - memset (dest, 0, width * sizeof(uint64_t)); -} - -static void -combine_dst (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - return; -} - -static void -combine_src_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - if (!mask) - memcpy (dest, src, width * sizeof (uint64_t)); - else - { - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - - *(dest + i) = s; - } - } -} - -/* if the Src is opaque, call combine_src_u */ -static void -combine_over_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - uint64_t ia = ALPHA_16 (~s); - - UN16x4_MUL_UN16_ADD_UN16x4 (d, ia, s); - *(dest + i) = d; - } -} - -/* if the Dst is opaque, this is a noop */ -static void -combine_over_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - uint64_t ia = ALPHA_16 (~*(dest + i)); - UN16x4_MUL_UN16_ADD_UN16x4 (s, ia, d); - *(dest + i) = s; - } -} - -/* if the Dst is opaque, call combine_src_u */ -static void -combine_in_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t a = ALPHA_16 (*(dest + i)); - UN16x4_MUL_UN16 (s, a); - *(dest + i) = s; - } -} - -/* if the Src is opaque, this is a noop */ -static void -combine_in_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - uint64_t a = ALPHA_16 (s); - UN16x4_MUL_UN16 (d, a); - *(dest + i) = d; - } -} - -/* if the Dst is opaque, call combine_clear */ -static void -combine_out_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t a = ALPHA_16 (~*(dest + i)); - UN16x4_MUL_UN16 (s, a); - *(dest + i) = s; - } -} - -/* if the Src is opaque, call combine_clear */ -static void -combine_out_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - uint64_t a = ALPHA_16 (~s); - UN16x4_MUL_UN16 (d, a); - *(dest + i) = d; - } -} - -/* if the Src is opaque, call combine_in_u */ -/* if the Dst is opaque, call combine_over_u */ -/* if both the Src and Dst are opaque, call combine_src_u */ -static void -combine_atop_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - uint64_t dest_a = ALPHA_16 (d); - uint64_t src_ia = ALPHA_16 (~s); - - UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (s, dest_a, d, src_ia); - *(dest + i) = s; - } -} - -/* if the Src is opaque, call combine_over_reverse_u */ -/* if the Dst is opaque, call combine_in_reverse_u */ -/* if both the Src and Dst are opaque, call combine_dst_u */ -static void -combine_atop_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - uint64_t src_a = ALPHA_16 (s); - uint64_t dest_ia = ALPHA_16 (~d); - - UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (s, dest_ia, d, src_a); - *(dest + i) = s; - } -} - -/* if the Src is opaque, call combine_over_u */ -/* if the Dst is opaque, call combine_over_reverse_u */ -/* if both the Src and Dst are opaque, call combine_clear */ -static void -combine_xor_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - uint64_t src_ia = ALPHA_16 (~s); - uint64_t dest_ia = ALPHA_16 (~d); - - UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (s, dest_ia, d, src_ia); - *(dest + i) = s; - } -} - -static void -combine_add_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - UN16x4_ADD_UN16x4 (d, s); - *(dest + i) = d; - } -} - -/* if the Src is opaque, call combine_add_u */ -/* if the Dst is opaque, call combine_add_u */ -/* if both the Src and Dst are opaque, call combine_add_u */ -static void -combine_saturate_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - uint32_t sa, da; - - sa = s >> A_SHIFT; - da = ~d >> A_SHIFT; - if (sa > da) - { - sa = DIV_UN16 (da, sa); - UN16x4_MUL_UN16 (s, sa); - } - ; - UN16x4_ADD_UN16x4 (d, s); - *(dest + i) = d; - } -} - -/* - * PDF blend modes: - * The following blend modes have been taken from the PDF ISO 32000 - * specification, which at this point in time is available from - * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf - * The relevant chapters are 11.3.5 and 11.3.6. - * The formula for computing the final pixel color given in 11.3.6 is: - * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs) - * with B() being the blend function. - * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs - * - * These blend modes should match the SVG filter draft specification, as - * it has been designed to mirror ISO 32000. Note that at the current point - * no released draft exists that shows this, as the formulas have not been - * updated yet after the release of ISO 32000. - * - * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and - * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an - * argument. Note that this implementation operates on premultiplied colors, - * while the PDF specification does not. Therefore the code uses the formula - * ar.Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as) - */ - -/* - * Multiply - * B(Dca, ad, Sca, as) = Dca.Sca - */ - -static void -combine_multiply_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - uint64_t ss = s; - uint64_t src_ia = ALPHA_16 (~s); - uint64_t dest_ia = ALPHA_16 (~d); - - UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (ss, dest_ia, d, src_ia); - UN16x4_MUL_UN16x4 (d, s); - UN16x4_ADD_UN16x4 (d, ss); - - *(dest + i) = d; - } -} - -static void -combine_multiply_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t m = *(mask + i); - uint64_t s = *(src + i); - uint64_t d = *(dest + i); - uint64_t r = d; - uint64_t dest_ia = ALPHA_16 (~d); - - combine_mask_value_ca (&s, &m); - - UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16 (r, ~m, s, dest_ia); - UN16x4_MUL_UN16x4 (d, s); - UN16x4_ADD_UN16x4 (r, d); - - *(dest + i) = r; - } -} - -#define PDF_SEPARABLE_BLEND_MODE(name) \ - static void \ - combine_ ## name ## _u (pixman_implementation_t *imp, \ - pixman_op_t op, \ - uint64_t * dest, \ - const uint64_t * src, \ - const uint64_t * mask, \ - int width) \ - { \ - int i; \ - for (i = 0; i < width; ++i) { \ - uint64_t s = combine_mask (src, mask, i); \ - uint64_t d = *(dest + i); \ - uint16_t sa = ALPHA_16 (s); \ - uint16_t isa = ~sa; \ - uint16_t da = ALPHA_16 (d); \ - uint16_t ida = ~da; \ - uint64_t result; \ - \ - result = d; \ - UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (result, isa, s, ida); \ - \ - *(dest + i) = result + \ - (DIV_ONE_UN16 (sa * da) << A_SHIFT) + \ - (blend_ ## name (RED_16 (d), da, RED_16 (s), sa) << R_SHIFT) + \ - (blend_ ## name (GREEN_16 (d), da, GREEN_16 (s), sa) << G_SHIFT) + \ - (blend_ ## name (BLUE_16 (d), da, BLUE_16 (s), sa)); \ - } \ - } \ - \ - static void \ - combine_ ## name ## _ca (pixman_implementation_t *imp, \ - pixman_op_t op, \ - uint64_t * dest, \ - const uint64_t * src, \ - const uint64_t * mask, \ - int width) \ - { \ - int i; \ - for (i = 0; i < width; ++i) { \ - uint64_t m = *(mask + i); \ - uint64_t s = *(src + i); \ - uint64_t d = *(dest + i); \ - uint16_t da = ALPHA_16 (d); \ - uint16_t ida = ~da; \ - uint64_t result; \ - \ - combine_mask_value_ca (&s, &m); \ - \ - result = d; \ - UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16 (result, ~m, s, ida); \ - \ - result += \ - (DIV_ONE_UN16 (ALPHA_16 (m) * da) << A_SHIFT) + \ - (blend_ ## name (RED_16 (d), da, RED_16 (s), RED_16 (m)) << R_SHIFT) + \ - (blend_ ## name (GREEN_16 (d), da, GREEN_16 (s), GREEN_16 (m)) << G_SHIFT) + \ - (blend_ ## name (BLUE_16 (d), da, BLUE_16 (s), BLUE_16 (m))); \ - \ - *(dest + i) = result; \ - } \ - } - -/* - * Screen - * B(Dca, ad, Sca, as) = Dca.sa + Sca.da - Dca.Sca - */ -static inline uint64_t -blend_screen (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa) -{ - return DIV_ONE_UN16 (sca * da + dca * sa - sca * dca); -} - -PDF_SEPARABLE_BLEND_MODE (screen) - -/* - * Overlay - * B(Dca, Da, Sca, Sa) = - * if 2.Dca < Da - * 2.Sca.Dca - * otherwise - * Sa.Da - 2.(Da - Dca).(Sa - Sca) - */ -static inline uint64_t -blend_overlay (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa) -{ - uint64_t rca; - - if (2 * dca < da) - rca = 2 * sca * dca; - else - rca = sa * da - 2 * (da - dca) * (sa - sca); - return DIV_ONE_UN16 (rca); -} - -PDF_SEPARABLE_BLEND_MODE (overlay) - -/* - * Darken - * B(Dca, Da, Sca, Sa) = min (Sca.Da, Dca.Sa) - */ -static inline uint64_t -blend_darken (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa) -{ - uint64_t s, d; - - s = sca * da; - d = dca * sa; - return DIV_ONE_UN16 (s > d ? d : s); -} - -PDF_SEPARABLE_BLEND_MODE (darken) - -/* - * Lighten - * B(Dca, Da, Sca, Sa) = max (Sca.Da, Dca.Sa) - */ -static inline uint64_t -blend_lighten (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa) -{ - uint64_t s, d; - - s = sca * da; - d = dca * sa; - return DIV_ONE_UN16 (s > d ? s : d); -} - -PDF_SEPARABLE_BLEND_MODE (lighten) - -/* - * Color dodge - * B(Dca, Da, Sca, Sa) = - * if Dca == 0 - * 0 - * if Sca == Sa - * Sa.Da - * otherwise - * Sa.Da. min (1, Dca / Da / (1 - Sca/Sa)) - */ -static inline uint64_t -blend_color_dodge (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa) -{ - if (sca >= sa) - { - return dca == 0 ? 0 : DIV_ONE_UN16 (sa * da); - } - else - { - uint64_t rca = dca * sa / (sa - sca); - return DIV_ONE_UN16 (sa * MIN (rca, da)); - } -} - -PDF_SEPARABLE_BLEND_MODE (color_dodge) - -/* - * Color burn - * B(Dca, Da, Sca, Sa) = - * if Dca == Da - * Sa.Da - * if Sca == 0 - * 0 - * otherwise - * Sa.Da.(1 - min (1, (1 - Dca/Da).Sa / Sca)) - */ -static inline uint64_t -blend_color_burn (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa) -{ - if (sca == 0) - { - return dca < da ? 0 : DIV_ONE_UN16 (sa * da); - } - else - { - uint64_t rca = (da - dca) * sa / sca; - return DIV_ONE_UN16 (sa * (MAX (rca, da) - rca)); - } -} - -PDF_SEPARABLE_BLEND_MODE (color_burn) - -/* - * Hard light - * B(Dca, Da, Sca, Sa) = - * if 2.Sca < Sa - * 2.Sca.Dca - * otherwise - * Sa.Da - 2.(Da - Dca).(Sa - Sca) - */ -static inline uint64_t -blend_hard_light (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa) -{ - if (2 * sca < sa) - return DIV_ONE_UN16 (2 * sca * dca); - else - return DIV_ONE_UN16 (sa * da - 2 * (da - dca) * (sa - sca)); -} - -PDF_SEPARABLE_BLEND_MODE (hard_light) - -/* - * Soft light - * B(Dca, Da, Sca, Sa) = - * if (2.Sca <= Sa) - * Dca.(Sa - (1 - Dca/Da).(2.Sca - Sa)) - * otherwise if Dca.4 <= Da - * Dca.(Sa + (2.Sca - Sa).((16.Dca/Da - 12).Dca/Da + 3) - * otherwise - * (Dca.Sa + (SQRT (Dca/Da).Da - Dca).(2.Sca - Sa)) - */ -static inline uint64_t -blend_soft_light (uint64_t dca_org, - uint64_t da_org, - uint64_t sca_org, - uint64_t sa_org) -{ - double dca = dca_org * (1.0 / MASK); - double da = da_org * (1.0 / MASK); - double sca = sca_org * (1.0 / MASK); - double sa = sa_org * (1.0 / MASK); - double rca; - - if (2 * sca < sa) - { - if (da == 0) - rca = dca * sa; - else - rca = dca * sa - dca * (da - dca) * (sa - 2 * sca) / da; - } - else if (da == 0) - { - rca = 0; - } - else if (4 * dca <= da) - { - rca = dca * sa + - (2 * sca - sa) * dca * ((16 * dca / da - 12) * dca / da + 3); - } - else - { - rca = dca * sa + (sqrt (dca * da) - dca) * (2 * sca - sa); - } - return rca * MASK + 0.5; -} - -PDF_SEPARABLE_BLEND_MODE (soft_light) - -/* - * Difference - * B(Dca, Da, Sca, Sa) = abs (Dca.Sa - Sca.Da) - */ -static inline uint64_t -blend_difference (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa) -{ - uint64_t dcasa = dca * sa; - uint64_t scada = sca * da; - - if (scada < dcasa) - return DIV_ONE_UN16 (dcasa - scada); - else - return DIV_ONE_UN16 (scada - dcasa); -} - -PDF_SEPARABLE_BLEND_MODE (difference) - -/* - * Exclusion - * B(Dca, Da, Sca, Sa) = (Sca.Da + Dca.Sa - 2.Sca.Dca) - */ - -/* This can be made faster by writing it directly and not using - * PDF_SEPARABLE_BLEND_MODE, but that's a performance optimization */ - -static inline uint64_t -blend_exclusion (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa) -{ - return DIV_ONE_UN16 (sca * da + dca * sa - 2 * dca * sca); -} - -PDF_SEPARABLE_BLEND_MODE (exclusion) - -#undef PDF_SEPARABLE_BLEND_MODE - -/* - * PDF nonseperable blend modes are implemented using the following functions - * to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid - * and min value of the red, green and blue components. - * - * LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue - * - * clip_color (C): - * l = LUM (C) - * min = Cmin - * max = Cmax - * if n < 0.0 - * C = l + ( ( ( C – l ) × l ) ⁄ ( l – min ) ) - * if x > 1.0 - * C = l + ( ( ( C – l ) × ( 1 – l ) ) ⁄ ( max – l ) ) - * return C - * - * set_lum (C, l): - * d = l – LUM (C) - * C += d - * return clip_color (C) - * - * SAT (C) = CH_MAX (C) - CH_MIN (C) - * - * set_sat (C, s): - * if Cmax > Cmin - * Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) ) - * Cmax = s - * else - * Cmid = Cmax = 0.0 - * Cmin = 0.0 - * return C - */ - -/* For premultiplied colors, we need to know what happens when C is - * multiplied by a real number. LUM and SAT are linear: - * - * LUM (r × C) = r × LUM (C) SAT (r * C) = r * SAT (C) - * - * If we extend clip_color with an extra argument a and change - * - * if x >= 1.0 - * - * into - * - * if x >= a - * - * then clip_color is also linear: - * - * r * clip_color (C, a) = clip_color (r_c, ra); - * - * for positive r. - * - * Similarly, we can extend set_lum with an extra argument that is just passed - * on to clip_color: - * - * r * set_lum ( C, l, a) - * - * = r × clip_color ( C + l - LUM (C), a) - * - * = clip_color ( r * C + r × l - r * LUM (C), r * a) - * - * = set_lum ( r * C, r * l, r * a) - * - * Finally, set_sat: - * - * r * set_sat (C, s) = set_sat (x * C, r * s) - * - * The above holds for all non-zero x, because they x'es in the fraction for - * C_mid cancel out. Specifically, it holds for x = r: - * - * r * set_sat (C, s) = set_sat (r_c, rs) - * - */ - -/* So, for the non-separable PDF blend modes, we have (using s, d for - * non-premultiplied colors, and S, D for premultiplied: - * - * Color: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1) - * = set_lum (S * a_d, a_s * LUM (D), a_s * a_d) - * - * - * Luminosity: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1) - * = set_lum (a_s * D, a_d * LUM(S), a_s * a_d) - * - * - * Saturation: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1) - * = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)), - * a_s * LUM (D), a_s * a_d) - * = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d)) - * - * Hue: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1) - * = a_s * a_d * set_lum (set_sat (a_d * S, a_s * SAT (D)), - * a_s * LUM (D), a_s * a_d) - * - */ - -#define CH_MIN(c) (c[0] < c[1] ? (c[0] < c[2] ? c[0] : c[2]) : (c[1] < c[2] ? c[1] : c[2])) -#define CH_MAX(c) (c[0] > c[1] ? (c[0] > c[2] ? c[0] : c[2]) : (c[1] > c[2] ? c[1] : c[2])) -#define LUM(c) ((c[0] * 30 + c[1] * 59 + c[2] * 11) / 100) -#define SAT(c) (CH_MAX (c) - CH_MIN (c)) - -#define PDF_NON_SEPARABLE_BLEND_MODE(name) \ - static void \ - combine_ ## name ## _u (pixman_implementation_t *imp, \ - pixman_op_t op, \ - uint64_t *dest, \ - const uint64_t *src, \ - const uint64_t *mask, \ - int width) \ - { \ - int i; \ - for (i = 0; i < width; ++i) \ - { \ - uint64_t s = combine_mask (src, mask, i); \ - uint64_t d = *(dest + i); \ - uint16_t sa = ALPHA_16 (s); \ - uint16_t isa = ~sa; \ - uint16_t da = ALPHA_16 (d); \ - uint16_t ida = ~da; \ - uint64_t result; \ - uint64_t sc[3], dc[3], c[3]; \ - \ - result = d; \ - UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (result, isa, s, ida); \ - dc[0] = RED_16 (d); \ - sc[0] = RED_16 (s); \ - dc[1] = GREEN_16 (d); \ - sc[1] = GREEN_16 (s); \ - dc[2] = BLUE_16 (d); \ - sc[2] = BLUE_16 (s); \ - blend_ ## name (c, dc, da, sc, sa); \ - \ - *(dest + i) = result + \ - (DIV_ONE_UN16 (sa * da) << A_SHIFT) + \ - (DIV_ONE_UN16 (c[0]) << R_SHIFT) + \ - (DIV_ONE_UN16 (c[1]) << G_SHIFT) + \ - (DIV_ONE_UN16 (c[2])); \ - } \ - } - -static void -set_lum (uint64_t dest[3], uint64_t src[3], uint64_t sa, uint64_t lum) -{ - double a, l, min, max; - double tmp[3]; - - a = sa * (1.0 / MASK); - - l = lum * (1.0 / MASK); - tmp[0] = src[0] * (1.0 / MASK); - tmp[1] = src[1] * (1.0 / MASK); - tmp[2] = src[2] * (1.0 / MASK); - - l = l - LUM (tmp); - tmp[0] += l; - tmp[1] += l; - tmp[2] += l; - - /* clip_color */ - l = LUM (tmp); - min = CH_MIN (tmp); - max = CH_MAX (tmp); - - if (min < 0) - { - if (l - min == 0.0) - { - tmp[0] = 0; - tmp[1] = 0; - tmp[2] = 0; - } - else - { - tmp[0] = l + (tmp[0] - l) * l / (l - min); - tmp[1] = l + (tmp[1] - l) * l / (l - min); - tmp[2] = l + (tmp[2] - l) * l / (l - min); - } - } - if (max > a) - { - if (max - l == 0.0) - { - tmp[0] = a; - tmp[1] = a; - tmp[2] = a; - } - else - { - tmp[0] = l + (tmp[0] - l) * (a - l) / (max - l); - tmp[1] = l + (tmp[1] - l) * (a - l) / (max - l); - tmp[2] = l + (tmp[2] - l) * (a - l) / (max - l); - } - } - - dest[0] = tmp[0] * MASK + 0.5; - dest[1] = tmp[1] * MASK + 0.5; - dest[2] = tmp[2] * MASK + 0.5; -} - -static void -set_sat (uint64_t dest[3], uint64_t src[3], uint64_t sat) -{ - int id[3]; - uint64_t min, max; - - if (src[0] > src[1]) - { - if (src[0] > src[2]) - { - id[0] = 0; - if (src[1] > src[2]) - { - id[1] = 1; - id[2] = 2; - } - else - { - id[1] = 2; - id[2] = 1; - } - } - else - { - id[0] = 2; - id[1] = 0; - id[2] = 1; - } - } - else - { - if (src[0] > src[2]) - { - id[0] = 1; - id[1] = 0; - id[2] = 2; - } - else - { - id[2] = 0; - if (src[1] > src[2]) - { - id[0] = 1; - id[1] = 2; - } - else - { - id[0] = 2; - id[1] = 1; - } - } - } - - max = dest[id[0]]; - min = dest[id[2]]; - if (max > min) - { - dest[id[1]] = (dest[id[1]] - min) * sat / (max - min); - dest[id[0]] = sat; - dest[id[2]] = 0; - } - else - { - dest[0] = dest[1] = dest[2] = 0; - } -} - -/* - * Hue: - * B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb)) - */ -static inline void -blend_hsl_hue (uint64_t c[3], - uint64_t dc[3], - uint64_t da, - uint64_t sc[3], - uint64_t sa) -{ - c[0] = sc[0] * da; - c[1] = sc[1] * da; - c[2] = sc[2] * da; - set_sat (c, c, SAT (dc) * sa); - set_lum (c, c, sa * da, LUM (dc) * sa); -} - -PDF_NON_SEPARABLE_BLEND_MODE (hsl_hue) - -/* - * Saturation: - * B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb)) - */ -static inline void -blend_hsl_saturation (uint64_t c[3], - uint64_t dc[3], - uint64_t da, - uint64_t sc[3], - uint64_t sa) -{ - c[0] = dc[0] * sa; - c[1] = dc[1] * sa; - c[2] = dc[2] * sa; - set_sat (c, c, SAT (sc) * da); - set_lum (c, c, sa * da, LUM (dc) * sa); -} - -PDF_NON_SEPARABLE_BLEND_MODE (hsl_saturation) - -/* - * Color: - * B(Cb, Cs) = set_lum (Cs, LUM (Cb)) - */ -static inline void -blend_hsl_color (uint64_t c[3], - uint64_t dc[3], - uint64_t da, - uint64_t sc[3], - uint64_t sa) -{ - c[0] = sc[0] * da; - c[1] = sc[1] * da; - c[2] = sc[2] * da; - set_lum (c, c, sa * da, LUM (dc) * sa); -} - -PDF_NON_SEPARABLE_BLEND_MODE (hsl_color) - -/* - * Luminosity: - * B(Cb, Cs) = set_lum (Cb, LUM (Cs)) - */ -static inline void -blend_hsl_luminosity (uint64_t c[3], - uint64_t dc[3], - uint64_t da, - uint64_t sc[3], - uint64_t sa) -{ - c[0] = dc[0] * sa; - c[1] = dc[1] * sa; - c[2] = dc[2] * sa; - set_lum (c, c, sa * da, LUM (sc) * da); -} - -PDF_NON_SEPARABLE_BLEND_MODE (hsl_luminosity) - -#undef SAT -#undef LUM -#undef CH_MAX -#undef CH_MIN -#undef PDF_NON_SEPARABLE_BLEND_MODE - -/* Overlay - * - * All of the disjoint composing functions - * - * The four entries in the first column indicate what source contributions - * come from each of the four areas of the picture -- areas covered by neither - * A nor B, areas covered only by A, areas covered only by B and finally - * areas covered by both A and B. - * - * Disjoint Conjoint - * Fa Fb Fa Fb - * (0,0,0,0) 0 0 0 0 - * (0,A,0,A) 1 0 1 0 - * (0,0,B,B) 0 1 0 1 - * (0,A,B,A) 1 min((1-a)/b,1) 1 max(1-a/b,0) - * (0,A,B,B) min((1-b)/a,1) 1 max(1-b/a,0) 1 - * (0,0,0,A) max(1-(1-b)/a,0) 0 min(1,b/a) 0 - * (0,0,0,B) 0 max(1-(1-a)/b,0) 0 min(a/b,1) - * (0,A,0,0) min(1,(1-b)/a) 0 max(1-b/a,0) 0 - * (0,0,B,0) 0 min(1,(1-a)/b) 0 max(1-a/b,0) - * (0,0,B,A) max(1-(1-b)/a,0) min(1,(1-a)/b) min(1,b/a) max(1-a/b,0) - * (0,A,0,B) min(1,(1-b)/a) max(1-(1-a)/b,0) max(1-b/a,0) min(1,a/b) - * (0,A,B,0) min(1,(1-b)/a) min(1,(1-a)/b) max(1-b/a,0) max(1-a/b,0) - */ - -#define COMBINE_A_OUT 1 -#define COMBINE_A_IN 2 -#define COMBINE_B_OUT 4 -#define COMBINE_B_IN 8 - -#define COMBINE_CLEAR 0 -#define COMBINE_A (COMBINE_A_OUT | COMBINE_A_IN) -#define COMBINE_B (COMBINE_B_OUT | COMBINE_B_IN) -#define COMBINE_A_OVER (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_A_IN) -#define COMBINE_B_OVER (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_B_IN) -#define COMBINE_A_ATOP (COMBINE_B_OUT | COMBINE_A_IN) -#define COMBINE_B_ATOP (COMBINE_A_OUT | COMBINE_B_IN) -#define COMBINE_XOR (COMBINE_A_OUT | COMBINE_B_OUT) - -/* portion covered by a but not b */ -static uint16_t -combine_disjoint_out_part (uint16_t a, uint16_t b) -{ - /* min (1, (1-b) / a) */ - - b = ~b; /* 1 - b */ - if (b >= a) /* 1 - b >= a -> (1-b)/a >= 1 */ - return MASK; /* 1 */ - return DIV_UN16 (b, a); /* (1-b) / a */ -} - -/* portion covered by both a and b */ -static uint16_t -combine_disjoint_in_part (uint16_t a, uint16_t b) -{ - /* max (1-(1-b)/a,0) */ - /* = - min ((1-b)/a - 1, 0) */ - /* = 1 - min (1, (1-b)/a) */ - - b = ~b; /* 1 - b */ - if (b >= a) /* 1 - b >= a -> (1-b)/a >= 1 */ - return 0; /* 1 - 1 */ - return ~DIV_UN16(b, a); /* 1 - (1-b) / a */ -} - -/* portion covered by a but not b */ -static uint16_t -combine_conjoint_out_part (uint16_t a, uint16_t b) -{ - /* max (1-b/a,0) */ - /* = 1-min(b/a,1) */ - - /* min (1, (1-b) / a) */ - - if (b >= a) /* b >= a -> b/a >= 1 */ - return 0x00; /* 0 */ - return ~DIV_UN16(b, a); /* 1 - b/a */ -} - -/* portion covered by both a and b */ -static uint16_t -combine_conjoint_in_part (uint16_t a, uint16_t b) -{ - /* min (1,b/a) */ - - if (b >= a) /* b >= a -> b/a >= 1 */ - return MASK; /* 1 */ - return DIV_UN16 (b, a); /* b/a */ -} - -#define GET_COMP(v, i) ((uint32_t) (uint16_t) ((v) >> i)) - -#define ADD(x, y, i, t) \ - ((t) = GET_COMP (x, i) + GET_COMP (y, i), \ - (uint64_t) ((uint16_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i)) - -#define GENERIC(x, y, i, ax, ay, t, u, v) \ - ((t) = (MUL_UN16 (GET_COMP (y, i), ay, (u)) + \ - MUL_UN16 (GET_COMP (x, i), ax, (v))), \ - (uint64_t) ((uint16_t) ((t) | \ - (0 - ((t) >> G_SHIFT)))) << (i)) - -static void -combine_disjoint_general_u (uint64_t * dest, - const uint64_t *src, - const uint64_t *mask, - int width, - uint16_t combine) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - uint64_t m, n, o, p; - uint32_t Fa, Fb, t, u, v; - uint16_t sa = s >> A_SHIFT; - uint16_t da = d >> A_SHIFT; - - switch (combine & COMBINE_A) - { - default: - Fa = 0; - break; - - case COMBINE_A_OUT: - Fa = combine_disjoint_out_part (sa, da); - break; - - case COMBINE_A_IN: - Fa = combine_disjoint_in_part (sa, da); - break; - - case COMBINE_A: - Fa = MASK; - break; - } - - switch (combine & COMBINE_B) - { - default: - Fb = 0; - break; - - case COMBINE_B_OUT: - Fb = combine_disjoint_out_part (da, sa); - break; - - case COMBINE_B_IN: - Fb = combine_disjoint_in_part (da, sa); - break; - - case COMBINE_B: - Fb = MASK; - break; - } - m = GENERIC (s, d, 0, Fa, Fb, t, u, v); - n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v); - o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v); - p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v); - s = m | n | o | p; - *(dest + i) = s; - } -} - -static void -combine_disjoint_over_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint32_t a = s >> A_SHIFT; - - if (s != 0x00) - { - uint64_t d = *(dest + i); - a = combine_disjoint_out_part (d >> A_SHIFT, a); - UN16x4_MUL_UN16_ADD_UN16x4 (d, a, s); - - *(dest + i) = d; - } - } -} - -static void -combine_disjoint_in_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_IN); -} - -static void -combine_disjoint_in_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_IN); -} - -static void -combine_disjoint_out_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_OUT); -} - -static void -combine_disjoint_out_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_OUT); -} - -static void -combine_disjoint_atop_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP); -} - -static void -combine_disjoint_atop_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP); -} - -static void -combine_disjoint_xor_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_u (dest, src, mask, width, COMBINE_XOR); -} - -static void -combine_conjoint_general_u (uint64_t * dest, - const uint64_t *src, - const uint64_t *mask, - int width, - uint16_t combine) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = combine_mask (src, mask, i); - uint64_t d = *(dest + i); - uint64_t m, n, o, p; - uint32_t Fa, Fb, t, u, v; - uint16_t sa = s >> A_SHIFT; - uint16_t da = d >> A_SHIFT; - - switch (combine & COMBINE_A) - { - default: - Fa = 0; - break; - - case COMBINE_A_OUT: - Fa = combine_conjoint_out_part (sa, da); - break; - - case COMBINE_A_IN: - Fa = combine_conjoint_in_part (sa, da); - break; - - case COMBINE_A: - Fa = MASK; - break; - } - - switch (combine & COMBINE_B) - { - default: - Fb = 0; - break; - - case COMBINE_B_OUT: - Fb = combine_conjoint_out_part (da, sa); - break; - - case COMBINE_B_IN: - Fb = combine_conjoint_in_part (da, sa); - break; - - case COMBINE_B: - Fb = MASK; - break; - } - - m = GENERIC (s, d, 0, Fa, Fb, t, u, v); - n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v); - o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v); - p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v); - - s = m | n | o | p; - - *(dest + i) = s; - } -} - -static void -combine_conjoint_over_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OVER); -} - -static void -combine_conjoint_over_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OVER); -} - -static void -combine_conjoint_in_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_IN); -} - -static void -combine_conjoint_in_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_IN); -} - -static void -combine_conjoint_out_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OUT); -} - -static void -combine_conjoint_out_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OUT); -} - -static void -combine_conjoint_atop_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP); -} - -static void -combine_conjoint_atop_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP); -} - -static void -combine_conjoint_xor_u (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_u (dest, src, mask, width, COMBINE_XOR); -} - -/************************************************************************/ -/*********************** Per Channel functions **************************/ -/************************************************************************/ - -static void -combine_clear_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - memset (dest, 0, width * sizeof(uint64_t)); -} - -static void -combine_src_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = *(src + i); - uint64_t m = *(mask + i); - - combine_mask_value_ca (&s, &m); - - *(dest + i) = s; - } -} - -static void -combine_over_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = *(src + i); - uint64_t m = *(mask + i); - uint64_t a; - - combine_mask_ca (&s, &m); - - a = ~m; - if (a) - { - uint64_t d = *(dest + i); - UN16x4_MUL_UN16x4_ADD_UN16x4 (d, a, s); - s = d; - } - - *(dest + i) = s; - } -} - -static void -combine_over_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t d = *(dest + i); - uint64_t a = ~d >> A_SHIFT; - - if (a) - { - uint64_t s = *(src + i); - uint64_t m = *(mask + i); - - UN16x4_MUL_UN16x4 (s, m); - UN16x4_MUL_UN16_ADD_UN16x4 (s, a, d); - - *(dest + i) = s; - } - } -} - -static void -combine_in_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t d = *(dest + i); - uint32_t a = d >> A_SHIFT; - uint64_t s = 0; - - if (a) - { - uint64_t m = *(mask + i); - - s = *(src + i); - combine_mask_value_ca (&s, &m); - - if (a != MASK) - UN16x4_MUL_UN16 (s, a); - } - - *(dest + i) = s; - } -} - -static void -combine_in_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = *(src + i); - uint64_t m = *(mask + i); - uint64_t a; - - combine_mask_alpha_ca (&s, &m); - - a = m; - if (a != ~0) - { - uint64_t d = 0; - - if (a) - { - d = *(dest + i); - UN16x4_MUL_UN16x4 (d, a); - } - - *(dest + i) = d; - } - } -} - -static void -combine_out_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t d = *(dest + i); - uint32_t a = ~d >> A_SHIFT; - uint64_t s = 0; - - if (a) - { - uint64_t m = *(mask + i); - - s = *(src + i); - combine_mask_value_ca (&s, &m); - - if (a != MASK) - UN16x4_MUL_UN16 (s, a); - } - - *(dest + i) = s; - } -} - -static void -combine_out_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = *(src + i); - uint64_t m = *(mask + i); - uint64_t a; - - combine_mask_alpha_ca (&s, &m); - - a = ~m; - if (a != ~0) - { - uint64_t d = 0; - - if (a) - { - d = *(dest + i); - UN16x4_MUL_UN16x4 (d, a); - } - - *(dest + i) = d; - } - } -} - -static void -combine_atop_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t d = *(dest + i); - uint64_t s = *(src + i); - uint64_t m = *(mask + i); - uint64_t ad; - uint32_t as = d >> A_SHIFT; - - combine_mask_ca (&s, &m); - - ad = ~m; - - UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16 (d, ad, s, as); - - *(dest + i) = d; - } -} - -static void -combine_atop_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t d = *(dest + i); - uint64_t s = *(src + i); - uint64_t m = *(mask + i); - uint64_t ad; - uint32_t as = ~d >> A_SHIFT; - - combine_mask_ca (&s, &m); - - ad = m; - - UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16 (d, ad, s, as); - - *(dest + i) = d; - } -} - -static void -combine_xor_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t d = *(dest + i); - uint64_t s = *(src + i); - uint64_t m = *(mask + i); - uint64_t ad; - uint32_t as = ~d >> A_SHIFT; - - combine_mask_ca (&s, &m); - - ad = ~m; - - UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16 (d, ad, s, as); - - *(dest + i) = d; - } -} - -static void -combine_add_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s = *(src + i); - uint64_t m = *(mask + i); - uint64_t d = *(dest + i); - - combine_mask_value_ca (&s, &m); - - UN16x4_ADD_UN16x4 (d, s); - - *(dest + i) = d; - } -} - -static void -combine_saturate_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s, d; - uint32_t sa, sr, sg, sb, da; - uint32_t t, u, v; - uint64_t m, n, o, p; - - d = *(dest + i); - s = *(src + i); - m = *(mask + i); - - combine_mask_ca (&s, &m); - - sa = (m >> A_SHIFT); - sr = (m >> R_SHIFT) & MASK; - sg = (m >> G_SHIFT) & MASK; - sb = m & MASK; - da = ~d >> A_SHIFT; - - if (sb <= da) - m = ADD (s, d, 0, t); - else - m = GENERIC (s, d, 0, (da << G_SHIFT) / sb, MASK, t, u, v); - - if (sg <= da) - n = ADD (s, d, G_SHIFT, t); - else - n = GENERIC (s, d, G_SHIFT, (da << G_SHIFT) / sg, MASK, t, u, v); - - if (sr <= da) - o = ADD (s, d, R_SHIFT, t); - else - o = GENERIC (s, d, R_SHIFT, (da << G_SHIFT) / sr, MASK, t, u, v); - - if (sa <= da) - p = ADD (s, d, A_SHIFT, t); - else - p = GENERIC (s, d, A_SHIFT, (da << G_SHIFT) / sa, MASK, t, u, v); - - *(dest + i) = m | n | o | p; - } -} - -static void -combine_disjoint_general_ca (uint64_t * dest, - const uint64_t *src, - const uint64_t *mask, - int width, - uint16_t combine) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s, d; - uint64_t m, n, o, p; - uint64_t Fa, Fb; - uint32_t t, u, v; - uint64_t sa; - uint16_t da; - - s = *(src + i); - m = *(mask + i); - d = *(dest + i); - da = d >> A_SHIFT; - - combine_mask_ca (&s, &m); - - sa = m; - - switch (combine & COMBINE_A) - { - default: - Fa = 0; - break; - - case COMBINE_A_OUT: - m = (uint64_t)combine_disjoint_out_part ((uint16_t) (sa >> 0), da); - n = (uint64_t)combine_disjoint_out_part ((uint16_t) (sa >> G_SHIFT), da) << G_SHIFT; - o = (uint64_t)combine_disjoint_out_part ((uint16_t) (sa >> R_SHIFT), da) << R_SHIFT; - p = (uint64_t)combine_disjoint_out_part ((uint16_t) (sa >> A_SHIFT), da) << A_SHIFT; - Fa = m | n | o | p; - break; - - case COMBINE_A_IN: - m = (uint64_t)combine_disjoint_in_part ((uint16_t) (sa >> 0), da); - n = (uint64_t)combine_disjoint_in_part ((uint16_t) (sa >> G_SHIFT), da) << G_SHIFT; - o = (uint64_t)combine_disjoint_in_part ((uint16_t) (sa >> R_SHIFT), da) << R_SHIFT; - p = (uint64_t)combine_disjoint_in_part ((uint16_t) (sa >> A_SHIFT), da) << A_SHIFT; - Fa = m | n | o | p; - break; - - case COMBINE_A: - Fa = ~0; - break; - } - - switch (combine & COMBINE_B) - { - default: - Fb = 0; - break; - - case COMBINE_B_OUT: - m = (uint64_t)combine_disjoint_out_part (da, (uint16_t) (sa >> 0)); - n = (uint64_t)combine_disjoint_out_part (da, (uint16_t) (sa >> G_SHIFT)) << G_SHIFT; - o = (uint64_t)combine_disjoint_out_part (da, (uint16_t) (sa >> R_SHIFT)) << R_SHIFT; - p = (uint64_t)combine_disjoint_out_part (da, (uint16_t) (sa >> A_SHIFT)) << A_SHIFT; - Fb = m | n | o | p; - break; - - case COMBINE_B_IN: - m = (uint64_t)combine_disjoint_in_part (da, (uint16_t) (sa >> 0)); - n = (uint64_t)combine_disjoint_in_part (da, (uint16_t) (sa >> G_SHIFT)) << G_SHIFT; - o = (uint64_t)combine_disjoint_in_part (da, (uint16_t) (sa >> R_SHIFT)) << R_SHIFT; - p = (uint64_t)combine_disjoint_in_part (da, (uint16_t) (sa >> A_SHIFT)) << A_SHIFT; - Fb = m | n | o | p; - break; - - case COMBINE_B: - Fb = ~0; - break; - } - m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v); - n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v); - o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v); - p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v); - - s = m | n | o | p; - - *(dest + i) = s; - } -} - -static void -combine_disjoint_over_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER); -} - -static void -combine_disjoint_in_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_IN); -} - -static void -combine_disjoint_in_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_IN); -} - -static void -combine_disjoint_out_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT); -} - -static void -combine_disjoint_out_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT); -} - -static void -combine_disjoint_atop_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP); -} - -static void -combine_disjoint_atop_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP); -} - -static void -combine_disjoint_xor_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_disjoint_general_ca (dest, src, mask, width, COMBINE_XOR); -} - -static void -combine_conjoint_general_ca (uint64_t * dest, - const uint64_t *src, - const uint64_t *mask, - int width, - uint16_t combine) -{ - int i; - - for (i = 0; i < width; ++i) - { - uint64_t s, d; - uint64_t m, n, o, p; - uint64_t Fa, Fb; - uint32_t t, u, v; - uint64_t sa; - uint16_t da; - - s = *(src + i); - m = *(mask + i); - d = *(dest + i); - da = d >> A_SHIFT; - - combine_mask_ca (&s, &m); - - sa = m; - - switch (combine & COMBINE_A) - { - default: - Fa = 0; - break; - - case COMBINE_A_OUT: - m = (uint64_t)combine_conjoint_out_part ((uint16_t) (sa >> 0), da); - n = (uint64_t)combine_conjoint_out_part ((uint16_t) (sa >> G_SHIFT), da) << G_SHIFT; - o = (uint64_t)combine_conjoint_out_part ((uint16_t) (sa >> R_SHIFT), da) << R_SHIFT; - p = (uint64_t)combine_conjoint_out_part ((uint16_t) (sa >> A_SHIFT), da) << A_SHIFT; - Fa = m | n | o | p; - break; - - case COMBINE_A_IN: - m = (uint64_t)combine_conjoint_in_part ((uint16_t) (sa >> 0), da); - n = (uint64_t)combine_conjoint_in_part ((uint16_t) (sa >> G_SHIFT), da) << G_SHIFT; - o = (uint64_t)combine_conjoint_in_part ((uint16_t) (sa >> R_SHIFT), da) << R_SHIFT; - p = (uint64_t)combine_conjoint_in_part ((uint16_t) (sa >> A_SHIFT), da) << A_SHIFT; - Fa = m | n | o | p; - break; - - case COMBINE_A: - Fa = ~0; - break; - } - - switch (combine & COMBINE_B) - { - default: - Fb = 0; - break; - - case COMBINE_B_OUT: - m = (uint64_t)combine_conjoint_out_part (da, (uint16_t) (sa >> 0)); - n = (uint64_t)combine_conjoint_out_part (da, (uint16_t) (sa >> G_SHIFT)) << G_SHIFT; - o = (uint64_t)combine_conjoint_out_part (da, (uint16_t) (sa >> R_SHIFT)) << R_SHIFT; - p = (uint64_t)combine_conjoint_out_part (da, (uint16_t) (sa >> A_SHIFT)) << A_SHIFT; - Fb = m | n | o | p; - break; - - case COMBINE_B_IN: - m = (uint64_t)combine_conjoint_in_part (da, (uint16_t) (sa >> 0)); - n = (uint64_t)combine_conjoint_in_part (da, (uint16_t) (sa >> G_SHIFT)) << G_SHIFT; - o = (uint64_t)combine_conjoint_in_part (da, (uint16_t) (sa >> R_SHIFT)) << R_SHIFT; - p = (uint64_t)combine_conjoint_in_part (da, (uint16_t) (sa >> A_SHIFT)) << A_SHIFT; - Fb = m | n | o | p; - break; - - case COMBINE_B: - Fb = ~0; - break; - } - m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v); - n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v); - o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v); - p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v); - - s = m | n | o | p; - - *(dest + i) = s; - } -} - -static void -combine_conjoint_over_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER); -} - -static void -combine_conjoint_over_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OVER); -} - -static void -combine_conjoint_in_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_IN); -} - -static void -combine_conjoint_in_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_IN); -} - -static void -combine_conjoint_out_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT); -} - -static void -combine_conjoint_out_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT); -} - -static void -combine_conjoint_atop_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP); -} - -static void -combine_conjoint_atop_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP); -} - -static void -combine_conjoint_xor_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - combine_conjoint_general_ca (dest, src, mask, width, COMBINE_XOR); -} - -void -_pixman_setup_combiner_functions_64 (pixman_implementation_t *imp) -{ - /* Unified alpha */ - imp->combine_64[PIXMAN_OP_CLEAR] = combine_clear; - imp->combine_64[PIXMAN_OP_SRC] = combine_src_u; - imp->combine_64[PIXMAN_OP_DST] = combine_dst; - imp->combine_64[PIXMAN_OP_OVER] = combine_over_u; - imp->combine_64[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u; - imp->combine_64[PIXMAN_OP_IN] = combine_in_u; - imp->combine_64[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u; - imp->combine_64[PIXMAN_OP_OUT] = combine_out_u; - imp->combine_64[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u; - imp->combine_64[PIXMAN_OP_ATOP] = combine_atop_u; - imp->combine_64[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u; - imp->combine_64[PIXMAN_OP_XOR] = combine_xor_u; - imp->combine_64[PIXMAN_OP_ADD] = combine_add_u; - imp->combine_64[PIXMAN_OP_SATURATE] = combine_saturate_u; - - /* Disjoint, unified */ - imp->combine_64[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear; - imp->combine_64[PIXMAN_OP_DISJOINT_SRC] = combine_src_u; - imp->combine_64[PIXMAN_OP_DISJOINT_DST] = combine_dst; - imp->combine_64[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u; - imp->combine_64[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_u; - imp->combine_64[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u; - imp->combine_64[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u; - imp->combine_64[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u; - imp->combine_64[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u; - imp->combine_64[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u; - imp->combine_64[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u; - imp->combine_64[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u; - - /* Conjoint, unified */ - imp->combine_64[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear; - imp->combine_64[PIXMAN_OP_CONJOINT_SRC] = combine_src_u; - imp->combine_64[PIXMAN_OP_CONJOINT_DST] = combine_dst; - imp->combine_64[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u; - imp->combine_64[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u; - imp->combine_64[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u; - imp->combine_64[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u; - imp->combine_64[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u; - imp->combine_64[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u; - imp->combine_64[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u; - imp->combine_64[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u; - imp->combine_64[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_u; - - imp->combine_64[PIXMAN_OP_MULTIPLY] = combine_multiply_u; - imp->combine_64[PIXMAN_OP_SCREEN] = combine_screen_u; - imp->combine_64[PIXMAN_OP_OVERLAY] = combine_overlay_u; - imp->combine_64[PIXMAN_OP_DARKEN] = combine_darken_u; - imp->combine_64[PIXMAN_OP_LIGHTEN] = combine_lighten_u; - imp->combine_64[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_u; - imp->combine_64[PIXMAN_OP_COLOR_BURN] = combine_color_burn_u; - imp->combine_64[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u; - imp->combine_64[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_u; - imp->combine_64[PIXMAN_OP_DIFFERENCE] = combine_difference_u; - imp->combine_64[PIXMAN_OP_EXCLUSION] = combine_exclusion_u; - imp->combine_64[PIXMAN_OP_HSL_HUE] = combine_hsl_hue_u; - imp->combine_64[PIXMAN_OP_HSL_SATURATION] = combine_hsl_saturation_u; - imp->combine_64[PIXMAN_OP_HSL_COLOR] = combine_hsl_color_u; - imp->combine_64[PIXMAN_OP_HSL_LUMINOSITY] = combine_hsl_luminosity_u; - - /* Component alpha combiners */ - imp->combine_64_ca[PIXMAN_OP_CLEAR] = combine_clear_ca; - imp->combine_64_ca[PIXMAN_OP_SRC] = combine_src_ca; - /* dest */ - imp->combine_64_ca[PIXMAN_OP_OVER] = combine_over_ca; - imp->combine_64_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca; - imp->combine_64_ca[PIXMAN_OP_IN] = combine_in_ca; - imp->combine_64_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca; - imp->combine_64_ca[PIXMAN_OP_OUT] = combine_out_ca; - imp->combine_64_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca; - imp->combine_64_ca[PIXMAN_OP_ATOP] = combine_atop_ca; - imp->combine_64_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca; - imp->combine_64_ca[PIXMAN_OP_XOR] = combine_xor_ca; - imp->combine_64_ca[PIXMAN_OP_ADD] = combine_add_ca; - imp->combine_64_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca; - - /* Disjoint CA */ - imp->combine_64_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear_ca; - imp->combine_64_ca[PIXMAN_OP_DISJOINT_SRC] = combine_src_ca; - imp->combine_64_ca[PIXMAN_OP_DISJOINT_DST] = combine_dst; - imp->combine_64_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca; - imp->combine_64_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_ca; - imp->combine_64_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca; - imp->combine_64_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca; - imp->combine_64_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca; - imp->combine_64_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca; - imp->combine_64_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca; - imp->combine_64_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca; - imp->combine_64_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca; - - /* Conjoint CA */ - imp->combine_64_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear_ca; - imp->combine_64_ca[PIXMAN_OP_CONJOINT_SRC] = combine_src_ca; - imp->combine_64_ca[PIXMAN_OP_CONJOINT_DST] = combine_dst; - imp->combine_64_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca; - imp->combine_64_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca; - imp->combine_64_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca; - imp->combine_64_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca; - imp->combine_64_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca; - imp->combine_64_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca; - imp->combine_64_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca; - imp->combine_64_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca; - imp->combine_64_ca[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_ca; - - imp->combine_64_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca; - imp->combine_64_ca[PIXMAN_OP_SCREEN] = combine_screen_ca; - imp->combine_64_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca; - imp->combine_64_ca[PIXMAN_OP_DARKEN] = combine_darken_ca; - imp->combine_64_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca; - imp->combine_64_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca; - imp->combine_64_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca; - imp->combine_64_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca; - imp->combine_64_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca; - imp->combine_64_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca; - imp->combine_64_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca; - - /* It is not clear that these make sense, so make them noops for now */ - imp->combine_64_ca[PIXMAN_OP_HSL_HUE] = combine_dst; - imp->combine_64_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst; - imp->combine_64_ca[PIXMAN_OP_HSL_COLOR] = combine_dst; - imp->combine_64_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst; -} - diff --git a/programs/develop/libraries/pixman/pixman-combine64.h b/programs/develop/libraries/pixman/pixman-combine64.h deleted file mode 100644 index ed9ffebf6c..0000000000 --- a/programs/develop/libraries/pixman/pixman-combine64.h +++ /dev/null @@ -1,230 +0,0 @@ -/* WARNING: This file is generated by combine.pl from combine.inc. - Please edit one of those files rather than this one. */ - -#line 1 "pixman-combine.c.template" - -#define COMPONENT_SIZE 16 -#define MASK 0xffffULL -#define ONE_HALF 0x8000ULL - -#define A_SHIFT 16 * 3 -#define R_SHIFT 16 * 2 -#define G_SHIFT 16 -#define A_MASK 0xffff000000000000ULL -#define R_MASK 0xffff00000000ULL -#define G_MASK 0xffff0000ULL - -#define RB_MASK 0xffff0000ffffULL -#define AG_MASK 0xffff0000ffff0000ULL -#define RB_ONE_HALF 0x800000008000ULL -#define RB_MASK_PLUS_ONE 0x10000000010000ULL - -#define ALPHA_16(x) ((x) >> A_SHIFT) -#define RED_16(x) (((x) >> R_SHIFT) & MASK) -#define GREEN_16(x) (((x) >> G_SHIFT) & MASK) -#define BLUE_16(x) ((x) & MASK) - -/* - * Helper macros. - */ - -#define MUL_UN16(a, b, t) \ - ((t) = (a) * (b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT )) - -#define DIV_UN16(a, b) \ - (((uint32_t) (a) * MASK) / (b)) - -#define ADD_UN16(x, y, t) \ - ((t) = (x) + (y), \ - (uint64_t) (uint16_t) ((t) | (0 - ((t) >> G_SHIFT)))) - -#define DIV_ONE_UN16(x) \ - (((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT) - -/* - * The methods below use some tricks to be able to do two color - * components at the same time. - */ - -/* - * x_rb = (x_rb * a) / 255 - */ -#define UN16_rb_MUL_UN16(x, a, t) \ - do \ - { \ - t = ((x) & RB_MASK) * (a); \ - t += RB_ONE_HALF; \ - x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ - x &= RB_MASK; \ - } while (0) - -/* - * x_rb = min (x_rb + y_rb, 255) - */ -#define UN16_rb_ADD_UN16_rb(x, y, t) \ - do \ - { \ - t = ((x) + (y)); \ - t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \ - x = (t & RB_MASK); \ - } while (0) - -/* - * x_rb = (x_rb * a_rb) / 255 - */ -#define UN16_rb_MUL_UN16_rb(x, a, t) \ - do \ - { \ - t = (x & MASK) * (a & MASK); \ - t |= (x & R_MASK) * ((a >> R_SHIFT) & MASK); \ - t += RB_ONE_HALF; \ - t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ - x = t & RB_MASK; \ - } while (0) - -/* - * x_c = (x_c * a) / 255 - */ -#define UN16x4_MUL_UN16(x, a) \ - do \ - { \ - uint64_t r1__, r2__, t__; \ - \ - r1__ = (x); \ - UN16_rb_MUL_UN16 (r1__, (a), t__); \ - \ - r2__ = (x) >> G_SHIFT; \ - UN16_rb_MUL_UN16 (r2__, (a), t__); \ - \ - (x) = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - * x_c = (x_c * a) / 255 + y_c - */ -#define UN16x4_MUL_UN16_ADD_UN16x4(x, a, y) \ - do \ - { \ - uint64_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x); \ - r2__ = (y) & RB_MASK; \ - UN16_rb_MUL_UN16 (r1__, (a), t__); \ - UN16_rb_ADD_UN16_rb (r1__, r2__, t__); \ - \ - r2__ = (x) >> G_SHIFT; \ - r3__ = ((y) >> G_SHIFT) & RB_MASK; \ - UN16_rb_MUL_UN16 (r2__, (a), t__); \ - UN16_rb_ADD_UN16_rb (r2__, r3__, t__); \ - \ - (x) = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - * x_c = (x_c * a + y_c * b) / 255 - */ -#define UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16(x, a, y, b) \ - do \ - { \ - uint64_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x); \ - r2__ = (y); \ - UN16_rb_MUL_UN16 (r1__, (a), t__); \ - UN16_rb_MUL_UN16 (r2__, (b), t__); \ - UN16_rb_ADD_UN16_rb (r1__, r2__, t__); \ - \ - r2__ = ((x) >> G_SHIFT); \ - r3__ = ((y) >> G_SHIFT); \ - UN16_rb_MUL_UN16 (r2__, (a), t__); \ - UN16_rb_MUL_UN16 (r3__, (b), t__); \ - UN16_rb_ADD_UN16_rb (r2__, r3__, t__); \ - \ - (x) = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - * x_c = (x_c * a_c) / 255 - */ -#define UN16x4_MUL_UN16x4(x, a) \ - do \ - { \ - uint64_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x); \ - r2__ = (a); \ - UN16_rb_MUL_UN16_rb (r1__, r2__, t__); \ - \ - r2__ = (x) >> G_SHIFT; \ - r3__ = (a) >> G_SHIFT; \ - UN16_rb_MUL_UN16_rb (r2__, r3__, t__); \ - \ - (x) = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - * x_c = (x_c * a_c) / 255 + y_c - */ -#define UN16x4_MUL_UN16x4_ADD_UN16x4(x, a, y) \ - do \ - { \ - uint64_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x); \ - r2__ = (a); \ - UN16_rb_MUL_UN16_rb (r1__, r2__, t__); \ - r2__ = (y) & RB_MASK; \ - UN16_rb_ADD_UN16_rb (r1__, r2__, t__); \ - \ - r2__ = ((x) >> G_SHIFT); \ - r3__ = ((a) >> G_SHIFT); \ - UN16_rb_MUL_UN16_rb (r2__, r3__, t__); \ - r3__ = ((y) >> G_SHIFT) & RB_MASK; \ - UN16_rb_ADD_UN16_rb (r2__, r3__, t__); \ - \ - (x) = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - * x_c = (x_c * a_c + y_c * b) / 255 - */ -#define UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16(x, a, y, b) \ - do \ - { \ - uint64_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x); \ - r2__ = (a); \ - UN16_rb_MUL_UN16_rb (r1__, r2__, t__); \ - r2__ = (y); \ - UN16_rb_MUL_UN16 (r2__, (b), t__); \ - UN16_rb_ADD_UN16_rb (r1__, r2__, t__); \ - \ - r2__ = (x) >> G_SHIFT; \ - r3__ = (a) >> G_SHIFT; \ - UN16_rb_MUL_UN16_rb (r2__, r3__, t__); \ - r3__ = (y) >> G_SHIFT; \ - UN16_rb_MUL_UN16 (r3__, (b), t__); \ - UN16_rb_ADD_UN16_rb (r2__, r3__, t__); \ - \ - x = r1__ | (r2__ << G_SHIFT); \ - } while (0) - -/* - x_c = min(x_c + y_c, 255) -*/ -#define UN16x4_ADD_UN16x4(x, y) \ - do \ - { \ - uint64_t r1__, r2__, r3__, t__; \ - \ - r1__ = (x) & RB_MASK; \ - r2__ = (y) & RB_MASK; \ - UN16_rb_ADD_UN16_rb (r1__, r2__, t__); \ - \ - r2__ = ((x) >> G_SHIFT) & RB_MASK; \ - r3__ = ((y) >> G_SHIFT) & RB_MASK; \ - UN16_rb_ADD_UN16_rb (r2__, r3__, t__); \ - \ - x = r1__ | (r2__ << G_SHIFT); \ - } while (0) diff --git a/programs/develop/libraries/pixman/pixman-compiler.h b/programs/develop/libraries/pixman/pixman-compiler.h index 0e08589c83..9b190b422f 100644 --- a/programs/develop/libraries/pixman/pixman-compiler.h +++ b/programs/develop/libraries/pixman/pixman-compiler.h @@ -18,6 +18,18 @@ # define FUNC ((const char*) ("???")) #endif +#if defined (__GNUC__) +# define unlikely(expr) __builtin_expect ((expr), 0) +#else +# define unlikely(expr) (expr) +#endif + +#if defined (__GNUC__) +# define MAYBE_UNUSED __attribute__((unused)) +#else +# define MAYBE_UNUSED +#endif + #ifndef INT16_MIN # define INT16_MIN (-32767-1) #endif @@ -42,6 +54,19 @@ # define UINT32_MAX (4294967295U) #endif +#ifndef INT64_MIN +# define INT64_MIN (-9223372036854775807-1) +#endif + +#ifndef INT64_MAX +# define INT64_MAX (9223372036854775807) +#endif + +#ifndef SIZE_MAX +# define SIZE_MAX ((size_t)-1) +#endif + + #ifndef M_PI # define M_PI 3.14159265358979323846 #endif @@ -74,6 +99,10 @@ # define PIXMAN_EXPORT #endif +/* member offsets */ +#define CONTAINER_OF(type, member, data) \ + ((type *)(((uint8_t *)data) - offsetof (type, member))) + /* TLS */ #if defined(PIXMAN_NO_TLS) @@ -82,10 +111,10 @@ # define PIXMAN_GET_THREAD_LOCAL(name) \ (&name) -#elif defined(TOOLCHAIN_SUPPORTS__THREAD) +#elif defined(TLS) # define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \ - static __thread type name + static TLS type name # define PIXMAN_GET_THREAD_LOCAL(name) \ (&name) @@ -191,8 +220,7 @@ value = tls_ ## name ## _alloc (); \ } \ return value; \ - } \ - extern int no_such_variable + } # define PIXMAN_GET_THREAD_LOCAL(name) \ tls_ ## name ## _get () diff --git a/programs/develop/libraries/pixman/pixman-conical-gradient.c b/programs/develop/libraries/pixman/pixman-conical-gradient.c index 897948be4f..8bb46aecdc 100644 --- a/programs/develop/libraries/pixman/pixman-conical-gradient.c +++ b/programs/develop/libraries/pixman/pixman-conical-gradient.c @@ -50,16 +50,16 @@ coordinates_to_parameter (double x, double y, double angle) */ } -static void -conical_gradient_get_scanline_32 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) +static uint32_t * +conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) { - source_image_t *source = (source_image_t *)image; - gradient_t *gradient = (gradient_t *)source; + pixman_image_t *image = iter->image; + int x = iter->x; + int y = iter->y; + int width = iter->width; + uint32_t *buffer = iter->buffer; + + gradient_t *gradient = (gradient_t *)image; conical_gradient_t *conical = (conical_gradient_t *)image; uint32_t *end = buffer + width; pixman_gradient_walker_t walker; @@ -71,9 +71,9 @@ conical_gradient_get_scanline_32 (pixman_image_t *image, double ry = y + 0.5; double rz = 1.; - _pixman_gradient_walker_init (&walker, gradient, source->common.repeat); + _pixman_gradient_walker_init (&walker, gradient, image->common.repeat); - if (source->common.transform) + if (image->common.transform) { pixman_vector_t v; @@ -82,19 +82,19 @@ conical_gradient_get_scanline_32 (pixman_image_t *image, v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2; v.vector[2] = pixman_fixed_1; - if (!pixman_transform_point_3d (source->common.transform, &v)) - return; + if (!pixman_transform_point_3d (image->common.transform, &v)) + return iter->buffer; - cx = source->common.transform->matrix[0][0] / 65536.; - cy = source->common.transform->matrix[1][0] / 65536.; - cz = source->common.transform->matrix[2][0] / 65536.; + cx = image->common.transform->matrix[0][0] / 65536.; + cy = image->common.transform->matrix[1][0] / 65536.; + cz = image->common.transform->matrix[2][0] / 65536.; rx = v.vector[0] / 65536.; ry = v.vector[1] / 65536.; rz = v.vector[2] / 65536.; affine = - source->common.transform->matrix[2][0] == 0 && + image->common.transform->matrix[2][0] == 0 && v.vector[2] == pixman_fixed_1; } @@ -155,17 +155,33 @@ conical_gradient_get_scanline_32 (pixman_image_t *image, rz += cz; } } + + iter->y++; + return iter->buffer; } -static void -conical_gradient_property_changed (pixman_image_t *image) +static uint32_t * +conical_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) { - image->common.get_scanline_32 = conical_gradient_get_scanline_32; - image->common.get_scanline_64 = _pixman_image_get_scanline_generic_64; + uint32_t *buffer = conical_get_scanline_narrow (iter, NULL); + + pixman_expand_to_float ( + (argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width); + + return buffer; +} + +void +_pixman_conical_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter) +{ + if (iter->iter_flags & ITER_NARROW) + iter->get_scanline = conical_get_scanline_narrow; + else + iter->get_scanline = conical_get_scanline_wide; } PIXMAN_EXPORT pixman_image_t * -pixman_image_create_conical_gradient (pixman_point_fixed_t * center, +pixman_image_create_conical_gradient (const pixman_point_fixed_t * center, pixman_fixed_t angle, const pixman_gradient_stop_t *stops, int n_stops) @@ -191,8 +207,6 @@ pixman_image_create_conical_gradient (pixman_point_fixed_t * center, conical->center = *center; conical->angle = (pixman_fixed_to_double (angle) / 180.0) * M_PI; - image->common.property_changed = conical_gradient_property_changed; - return image; } diff --git a/programs/develop/libraries/pixman/pixman-cpu.c b/programs/develop/libraries/pixman/pixman-cpu.c deleted file mode 100644 index e4fb1e4097..0000000000 --- a/programs/develop/libraries/pixman/pixman-cpu.c +++ /dev/null @@ -1,598 +0,0 @@ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of SuSE not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. SuSE makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -#ifdef HAVE_CONFIG_H -#include -#endif - -#include - -#if defined(USE_ARM_SIMD) && defined(_MSC_VER) -/* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */ -#include -#endif - -#include "pixman-private.h" - -#ifdef USE_VMX - -/* The CPU detection code needs to be in a file not compiled with - * "-maltivec -mabi=altivec", as gcc would try to save vector register - * across function calls causing SIGILL on cpus without Altivec/vmx. - */ -static pixman_bool_t initialized = FALSE; -static volatile pixman_bool_t have_vmx = TRUE; - -#ifdef __APPLE__ -#include - -static pixman_bool_t -pixman_have_vmx (void) -{ - if (!initialized) - { - size_t length = sizeof(have_vmx); - int error = - sysctlbyname ("hw.optional.altivec", &have_vmx, &length, NULL, 0); - - if (error) - have_vmx = FALSE; - - initialized = TRUE; - } - return have_vmx; -} - -#elif defined (__OpenBSD__) -#include -#include -#include - -static pixman_bool_t -pixman_have_vmx (void) -{ - if (!initialized) - { - int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC }; - size_t length = sizeof(have_vmx); - int error = - sysctl (mib, 2, &have_vmx, &length, NULL, 0); - - if (error != 0) - have_vmx = FALSE; - - initialized = TRUE; - } - return have_vmx; -} - -#elif defined (__linux__) -#include -#include -#include -#include -#include -#include -#include - -static pixman_bool_t -pixman_have_vmx (void) -{ - if (!initialized) - { - char fname[64]; - unsigned long buf[64]; - ssize_t count = 0; - pid_t pid; - int fd, i; - - pid = getpid (); - snprintf (fname, sizeof(fname) - 1, "/proc/%d/auxv", pid); - - fd = open (fname, O_RDONLY); - if (fd >= 0) - { - for (i = 0; i <= (count / sizeof(unsigned long)); i += 2) - { - /* Read more if buf is empty... */ - if (i == (count / sizeof(unsigned long))) - { - count = read (fd, buf, sizeof(buf)); - if (count <= 0) - break; - i = 0; - } - - if (buf[i] == AT_HWCAP) - { - have_vmx = !!(buf[i + 1] & PPC_FEATURE_HAS_ALTIVEC); - initialized = TRUE; - break; - } - else if (buf[i] == AT_NULL) - { - break; - } - } - close (fd); - } - } - if (!initialized) - { - /* Something went wrong. Assume 'no' rather than playing - fragile tricks with catching SIGILL. */ - have_vmx = FALSE; - initialized = TRUE; - } - - return have_vmx; -} - -#else /* !__APPLE__ && !__OpenBSD__ && !__linux__ */ -#include -#include - -static jmp_buf jump_env; - -static void -vmx_test (int sig, - siginfo_t *si, - void * unused) -{ - longjmp (jump_env, 1); -} - -static pixman_bool_t -pixman_have_vmx (void) -{ - struct sigaction sa, osa; - int jmp_result; - - if (!initialized) - { - sa.sa_flags = SA_SIGINFO; - sigemptyset (&sa.sa_mask); - sa.sa_sigaction = vmx_test; - sigaction (SIGILL, &sa, &osa); - jmp_result = setjmp (jump_env); - if (jmp_result == 0) - { - asm volatile ( "vor 0, 0, 0" ); - } - sigaction (SIGILL, &osa, NULL); - have_vmx = (jmp_result == 0); - initialized = TRUE; - } - return have_vmx; -} - -#endif /* __APPLE__ */ -#endif /* USE_VMX */ - -#if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON) - -#if defined(_MSC_VER) - -#if defined(USE_ARM_SIMD) -extern int pixman_msvc_try_arm_simd_op (); - -pixman_bool_t -pixman_have_arm_simd (void) -{ - static pixman_bool_t initialized = FALSE; - static pixman_bool_t have_arm_simd = FALSE; - - if (!initialized) - { - __try { - pixman_msvc_try_arm_simd_op (); - have_arm_simd = TRUE; - } __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) { - have_arm_simd = FALSE; - } - initialized = TRUE; - } - - return have_arm_simd; -} - -#endif /* USE_ARM_SIMD */ - -#if defined(USE_ARM_NEON) -extern int pixman_msvc_try_arm_neon_op (); - -pixman_bool_t -pixman_have_arm_neon (void) -{ - static pixman_bool_t initialized = FALSE; - static pixman_bool_t have_arm_neon = FALSE; - - if (!initialized) - { - __try - { - pixman_msvc_try_arm_neon_op (); - have_arm_neon = TRUE; - } - __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) - { - have_arm_neon = FALSE; - } - initialized = TRUE; - } - - return have_arm_neon; -} - -#endif /* USE_ARM_NEON */ - -#else /* linux ELF */ - -#include -#include -#include -#include -#include -#include -#include -#include - -static pixman_bool_t arm_has_v7 = FALSE; -static pixman_bool_t arm_has_v6 = FALSE; -static pixman_bool_t arm_has_vfp = FALSE; -static pixman_bool_t arm_has_neon = FALSE; -static pixman_bool_t arm_has_iwmmxt = FALSE; -static pixman_bool_t arm_tests_initialized = FALSE; - -static void -pixman_arm_read_auxv () -{ - int fd; - Elf32_auxv_t aux; - - fd = open ("/proc/self/auxv", O_RDONLY); - if (fd >= 0) - { - while (read (fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t)) - { - if (aux.a_type == AT_HWCAP) - { - uint32_t hwcap = aux.a_un.a_val; - /* hardcode these values to avoid depending on specific - * versions of the hwcap header, e.g. HWCAP_NEON - */ - arm_has_vfp = (hwcap & 64) != 0; - arm_has_iwmmxt = (hwcap & 512) != 0; - /* this flag is only present on kernel 2.6.29 */ - arm_has_neon = (hwcap & 4096) != 0; - } - else if (aux.a_type == AT_PLATFORM) - { - const char *plat = (const char*) aux.a_un.a_val; - if (strncmp (plat, "v7l", 3) == 0) - { - arm_has_v7 = TRUE; - arm_has_v6 = TRUE; - } - else if (strncmp (plat, "v6l", 3) == 0) - { - arm_has_v6 = TRUE; - } - } - } - close (fd); - } - - arm_tests_initialized = TRUE; -} - -#if defined(USE_ARM_SIMD) -pixman_bool_t -pixman_have_arm_simd (void) -{ - if (!arm_tests_initialized) - pixman_arm_read_auxv (); - - return arm_has_v6; -} - -#endif /* USE_ARM_SIMD */ - -#if defined(USE_ARM_NEON) -pixman_bool_t -pixman_have_arm_neon (void) -{ - if (!arm_tests_initialized) - pixman_arm_read_auxv (); - - return arm_has_neon; -} - -#endif /* USE_ARM_NEON */ - -#endif /* linux */ - -#endif /* USE_ARM_SIMD || USE_ARM_NEON */ - -#if defined(USE_MMX) || defined(USE_SSE2) -/* The CPU detection code needs to be in a file not compiled with - * "-mmmx -msse", as gcc would generate CMOV instructions otherwise - * that would lead to SIGILL instructions on old CPUs that don't have - * it. - */ -#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64) - -#ifdef HAVE_GETISAX -#include -#endif - -typedef enum -{ - NO_FEATURES = 0, - MMX = 0x1, - MMX_EXTENSIONS = 0x2, - SSE = 0x6, - SSE2 = 0x8, - CMOV = 0x10 -} cpu_features_t; - - -static unsigned int -detect_cpu_features (void) -{ - unsigned int features = 0; - unsigned int result = 0; - -#ifdef HAVE_GETISAX - if (getisax (&result, 1)) - { - if (result & AV_386_CMOV) - features |= CMOV; - if (result & AV_386_MMX) - features |= MMX; - if (result & AV_386_AMD_MMX) - features |= MMX_EXTENSIONS; - if (result & AV_386_SSE) - features |= SSE; - if (result & AV_386_SSE2) - features |= SSE2; - } -#else - char vendor[13]; -#ifdef _MSC_VER - int vendor0 = 0, vendor1, vendor2; -#endif - vendor[0] = 0; - vendor[12] = 0; - -#ifdef __GNUC__ - /* see p. 118 of amd64 instruction set manual Vol3 */ - /* We need to be careful about the handling of %ebx and - * %esp here. We can't declare either one as clobbered - * since they are special registers (%ebx is the "PIC - * register" holding an offset to global data, %esp the - * stack pointer), so we need to make sure they have their - * original values when we access the output operands. - */ - __asm__ ( - "pushf\n" - "pop %%eax\n" - "mov %%eax, %%ecx\n" - "xor $0x00200000, %%eax\n" - "push %%eax\n" - "popf\n" - "pushf\n" - "pop %%eax\n" - "mov $0x0, %%edx\n" - "xor %%ecx, %%eax\n" - "jz 1f\n" - - "mov $0x00000000, %%eax\n" - "push %%ebx\n" - "cpuid\n" - "mov %%ebx, %%eax\n" - "pop %%ebx\n" - "mov %%eax, %1\n" - "mov %%edx, %2\n" - "mov %%ecx, %3\n" - "mov $0x00000001, %%eax\n" - "push %%ebx\n" - "cpuid\n" - "pop %%ebx\n" - "1:\n" - "mov %%edx, %0\n" - : "=r" (result), - "=m" (vendor[0]), - "=m" (vendor[4]), - "=m" (vendor[8]) - : - : "%eax", "%ecx", "%edx" - ); - -#elif defined (_MSC_VER) - - _asm { - pushfd - pop eax - mov ecx, eax - xor eax, 00200000h - push eax - popfd - pushfd - pop eax - mov edx, 0 - xor eax, ecx - jz nocpuid - - mov eax, 0 - push ebx - cpuid - mov eax, ebx - pop ebx - mov vendor0, eax - mov vendor1, edx - mov vendor2, ecx - mov eax, 1 - push ebx - cpuid - pop ebx - nocpuid: - mov result, edx - } - memmove (vendor + 0, &vendor0, 4); - memmove (vendor + 4, &vendor1, 4); - memmove (vendor + 8, &vendor2, 4); - -#else -# error unsupported compiler -#endif - - features = 0; - if (result) - { - /* result now contains the standard feature bits */ - if (result & (1 << 15)) - features |= CMOV; - if (result & (1 << 23)) - features |= MMX; - if (result & (1 << 25)) - features |= SSE; - if (result & (1 << 26)) - features |= SSE2; - if ((features & MMX) && !(features & SSE) && - (strcmp (vendor, "AuthenticAMD") == 0 || - strcmp (vendor, "Geode by NSC") == 0)) - { - /* check for AMD MMX extensions */ -#ifdef __GNUC__ - __asm__ ( - " push %%ebx\n" - " mov $0x80000000, %%eax\n" - " cpuid\n" - " xor %%edx, %%edx\n" - " cmp $0x1, %%eax\n" - " jge 2f\n" - " mov $0x80000001, %%eax\n" - " cpuid\n" - "2:\n" - " pop %%ebx\n" - " mov %%edx, %0\n" - : "=r" (result) - : - : "%eax", "%ecx", "%edx" - ); -#elif defined _MSC_VER - _asm { - push ebx - mov eax, 80000000h - cpuid - xor edx, edx - cmp eax, 1 - jge notamd - mov eax, 80000001h - cpuid - notamd: - pop ebx - mov result, edx - } -#endif - if (result & (1 << 22)) - features |= MMX_EXTENSIONS; - } - } -#endif /* HAVE_GETISAX */ - - return features; -} - -static pixman_bool_t -pixman_have_mmx (void) -{ - static pixman_bool_t initialized = FALSE; - static pixman_bool_t mmx_present; - - if (!initialized) - { - unsigned int features = detect_cpu_features (); - mmx_present = (features & (MMX | MMX_EXTENSIONS)) == (MMX | MMX_EXTENSIONS); - initialized = TRUE; - } - - return mmx_present; -} - -#ifdef USE_SSE2 -static pixman_bool_t -pixman_have_sse2 (void) -{ - static pixman_bool_t initialized = FALSE; - static pixman_bool_t sse2_present; - - if (!initialized) - { - unsigned int features = detect_cpu_features (); - sse2_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2)) == (MMX | MMX_EXTENSIONS | SSE | SSE2); - initialized = TRUE; - } - - return sse2_present; -} - -#endif - -#else /* __amd64__ */ -#ifdef USE_MMX -#define pixman_have_mmx() TRUE -#endif -#ifdef USE_SSE2 -#define pixman_have_sse2() TRUE -#endif -#endif /* __amd64__ */ -#endif - -pixman_implementation_t * -_pixman_choose_implementation (void) -{ -#ifdef USE_SSE2 - if (pixman_have_sse2 ()) - return _pixman_implementation_create_sse2 (); -#endif -#ifdef USE_MMX - if (pixman_have_mmx ()) - return _pixman_implementation_create_mmx (); -#endif - -#ifdef USE_ARM_NEON - if (pixman_have_arm_neon ()) - return _pixman_implementation_create_arm_neon (); -#endif -#ifdef USE_ARM_SIMD - if (pixman_have_arm_simd ()) - return _pixman_implementation_create_arm_simd (); -#endif -#ifdef USE_VMX - if (pixman_have_vmx ()) - return _pixman_implementation_create_vmx (); -#endif - - return _pixman_implementation_create_fast_path (); -} - diff --git a/programs/develop/libraries/pixman/pixman-edge.c b/programs/develop/libraries/pixman/pixman-edge.c index 8d498ab445..ad6dfc4cfa 100644 --- a/programs/develop/libraries/pixman/pixman-edge.c +++ b/programs/develop/libraries/pixman/pixman-edge.c @@ -374,6 +374,7 @@ pixman_rasterize_edges (pixman_image_t *image, pixman_fixed_t b) { return_if_fail (image->type == BITS); + return_if_fail (PIXMAN_FORMAT_TYPE (image->bits.format) == PIXMAN_TYPE_A); if (image->bits.read_func || image->bits.write_func) pixman_rasterize_edges_accessors (image, l, r, t, b); diff --git a/programs/develop/libraries/pixman/pixman-fast-path.c b/programs/develop/libraries/pixman/pixman-fast-path.c index 5d5fa956c8..247aea6450 100644 --- a/programs/develop/libraries/pixman/pixman-fast-path.c +++ b/programs/develop/libraries/pixman/pixman-fast-path.c @@ -30,12 +30,12 @@ #include #include "pixman-private.h" #include "pixman-combine32.h" -#include "pixman-fast-path.h" +#include "pixman-inlines.h" static force_inline uint32_t fetch_24 (uint8_t *a) { - if (((unsigned long)a) & 1) + if (((uintptr_t)a) & 1) { #ifdef WORDS_BIGENDIAN return (*a << 16) | (*(uint16_t *)(a + 1)); @@ -57,7 +57,7 @@ static force_inline void store_24 (uint8_t *a, uint32_t v) { - if (((unsigned long)a) & 1) + if (((uintptr_t)a) & 1) { #ifdef WORDS_BIGENDIAN *a = (uint8_t) (v >> 16); @@ -90,7 +90,7 @@ over (uint32_t src, return dest; } -static uint32_t +static force_inline uint32_t in (uint32_t x, uint8_t y) { @@ -108,19 +108,9 @@ in (uint32_t x, */ static void fast_composite_over_x888_8_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t *src, *src_line; uint32_t *dst, *dst_line; uint8_t *mask, *mask_line; @@ -129,7 +119,7 @@ fast_composite_over_x888_8_8888 (pixman_implementation_t *imp, uint32_t s, d; int32_t w; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); @@ -168,19 +158,9 @@ fast_composite_over_x888_8_8888 (pixman_implementation_t *imp, static void fast_composite_in_n_8_8 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dest_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t src, srca; uint8_t *dst_line, *dst; uint8_t *mask_line, *mask, m; @@ -188,7 +168,7 @@ fast_composite_in_n_8_8 (pixman_implementation_t *imp, int32_t w; uint16_t t; - src = _pixman_image_get_solid (src_image, dest_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; @@ -246,19 +226,9 @@ fast_composite_in_n_8_8 (pixman_implementation_t *imp, static void fast_composite_in_8_8 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dest_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint8_t *dst_line, *dst; uint8_t *src_line, *src; int dst_stride, src_stride; @@ -293,32 +263,22 @@ fast_composite_in_8_8 (pixman_implementation_t *imp, static void fast_composite_over_n_8_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t src, srca; uint32_t *dst_line, *dst, d; uint8_t *mask_line, *mask, m; int dst_stride, mask_stride; int32_t w; - src = _pixman_image_get_solid (src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; if (src == 0) return; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); while (height--) @@ -351,32 +311,21 @@ fast_composite_over_n_8_8888 (pixman_implementation_t *imp, static void fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { - uint32_t src, srca, s; + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, s; uint32_t *dst_line, *dst, d; uint32_t *mask_line, *mask, ma; int dst_stride, mask_stride; int32_t w; - src = _pixman_image_get_solid (src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - srca = src >> 24; if (src == 0) return; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); while (height--) @@ -408,32 +357,22 @@ fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp, static void fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t src, srca, s; uint32_t *dst_line, *dst, d; uint32_t *mask_line, *mask, ma; int dst_stride, mask_stride; int32_t w; - src = _pixman_image_get_solid (src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; if (src == 0) return; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); while (height--) @@ -474,19 +413,9 @@ fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, static void fast_composite_over_n_8_0888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t src, srca; uint8_t *dst_line, *dst; uint32_t d; @@ -494,13 +423,13 @@ fast_composite_over_n_8_0888 (pixman_implementation_t *imp, int dst_stride, mask_stride; int32_t w; - src = _pixman_image_get_solid (src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; if (src == 0) return; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); while (height--) @@ -539,19 +468,9 @@ fast_composite_over_n_8_0888 (pixman_implementation_t *imp, static void fast_composite_over_n_8_0565 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t src, srca; uint16_t *dst_line, *dst; uint32_t d; @@ -559,13 +478,13 @@ fast_composite_over_n_8_0565 (pixman_implementation_t *imp, int dst_stride, mask_stride; int32_t w; - src = _pixman_image_get_solid (src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; if (src == 0) return; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); while (height--) @@ -588,15 +507,15 @@ fast_composite_over_n_8_0565 (pixman_implementation_t *imp, else { d = *dst; - d = over (src, CONVERT_0565_TO_0888 (d)); + d = over (src, convert_0565_to_0888 (d)); } - *dst = CONVERT_8888_TO_0565 (d); + *dst = convert_8888_to_0565 (d); } else if (m) { d = *dst; - d = over (in (src, m), CONVERT_0565_TO_0888 (d)); - *dst = CONVERT_8888_TO_0565 (d); + d = over (in (src, m), convert_0565_to_0888 (d)); + *dst = convert_8888_to_0565 (d); } dst++; } @@ -605,19 +524,9 @@ fast_composite_over_n_8_0565 (pixman_implementation_t *imp, static void fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t src, srca, s; uint16_t src16; uint16_t *dst_line, *dst; @@ -626,15 +535,15 @@ fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, int dst_stride, mask_stride; int32_t w; - src = _pixman_image_get_solid (src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; if (src == 0) return; - src16 = CONVERT_8888_TO_0565 (src); + src16 = convert_8888_to_0565 (src); - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); while (height--) @@ -657,14 +566,14 @@ fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, else { d = *dst; - d = over (src, CONVERT_0565_TO_0888 (d)); - *dst = CONVERT_8888_TO_0565 (d); + d = over (src, convert_0565_to_0888 (d)); + *dst = convert_8888_to_0565 (d); } } else if (ma) { d = *dst; - d = CONVERT_0565_TO_0888 (d); + d = convert_0565_to_0888 (d); s = src; @@ -673,7 +582,7 @@ fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, ma = ~ma; UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); - *dst = CONVERT_8888_TO_0565 (d); + *dst = convert_8888_to_0565 (d); } dst++; } @@ -682,26 +591,16 @@ fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, static void fast_composite_over_8888_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t *dst_line, *dst; uint32_t *src_line, *src, s; int dst_stride, src_stride; uint8_t a; int32_t w; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); while (height--) @@ -727,25 +626,15 @@ fast_composite_over_8888_8888 (pixman_implementation_t *imp, static void fast_composite_src_x888_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t *dst_line, *dst; uint32_t *src_line, *src; int dst_stride, src_stride; int32_t w; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); while (height--) @@ -764,19 +653,9 @@ fast_composite_src_x888_8888 (pixman_implementation_t *imp, #if 0 static void fast_composite_over_8888_0888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint8_t *dst_line, *dst; uint32_t d; uint32_t *src_line, *src, s; @@ -784,7 +663,7 @@ fast_composite_over_8888_0888 (pixman_implementation_t *imp, int dst_stride, src_stride; int32_t w; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); while (height--) @@ -816,19 +695,9 @@ fast_composite_over_8888_0888 (pixman_implementation_t *imp, static void fast_composite_over_8888_0565 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint16_t *dst_line, *dst; uint32_t d; uint32_t *src_line, *src, s; @@ -837,7 +706,7 @@ fast_composite_over_8888_0565 (pixman_implementation_t *imp, int32_t w; PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); while (height--) { @@ -860,70 +729,20 @@ fast_composite_over_8888_0565 (pixman_implementation_t *imp, else { d = *dst; - d = over (s, CONVERT_0565_TO_0888 (d)); + d = over (s, convert_0565_to_0888 (d)); } - *dst = CONVERT_8888_TO_0565 (d); + *dst = convert_8888_to_0565 (d); } dst++; } } } -static void -fast_composite_src_x888_0565 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - uint16_t *dst_line, *dst; - uint32_t *src_line, *src, s; - int dst_stride, src_stride; - int32_t w; - - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - { - s = *src++; - *dst = CONVERT_8888_TO_0565 (s); - dst++; - } - } -} - static void fast_composite_add_8_8 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint8_t *dst_line, *dst; uint8_t *src_line, *src; int dst_stride, src_stride; @@ -932,7 +751,7 @@ fast_composite_add_8_8 (pixman_implementation_t *imp, uint16_t t; PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); while (height--) { @@ -961,20 +780,52 @@ fast_composite_add_8_8 (pixman_implementation_t *imp, } static void -fast_composite_add_8888_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +fast_composite_add_0565_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); + uint16_t *dst_line, *dst; + uint32_t d; + uint16_t *src_line, *src; + uint32_t s; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + if (s) + { + d = *dst; + s = convert_0565_to_8888 (s); + if (d) + { + d = convert_0565_to_8888 (d); + UN8x4_ADD_UN8x4 (s, d); + } + *dst = convert_8888_to_0565 (s); + } + dst++; + } + } +} + +static void +fast_composite_add_8888_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); uint32_t *dst_line, *dst; uint32_t *src_line, *src; int dst_stride, src_stride; @@ -982,7 +833,7 @@ fast_composite_add_8888_8888 (pixman_implementation_t *imp, uint32_t s, d; PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); while (height--) { @@ -1012,19 +863,9 @@ fast_composite_add_8888_8888 (pixman_implementation_t *imp, static void fast_composite_add_n_8_8 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint8_t *dst_line, *dst; uint8_t *mask_line, *mask; int dst_stride, mask_stride; @@ -1032,9 +873,9 @@ fast_composite_add_n_8_8 (pixman_implementation_t *imp, uint32_t src; uint8_t sa; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - src = _pixman_image_get_solid (src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); sa = (src >> 24); while (height--) @@ -1077,20 +918,10 @@ fast_composite_add_n_8_8 (pixman_implementation_t *imp, do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0); static void -fast_composite_add_1000_1000 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +fast_composite_add_1_1 (pixman_implementation_t *imp, + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t *dst_line, *dst; uint32_t *src_line, *src; int dst_stride, src_stride; @@ -1098,7 +929,7 @@ fast_composite_add_1000_1000 (pixman_implementation_t *imp, PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dst_image, 0, dest_y, uint32_t, + PIXMAN_IMAGE_GET_LINE (dest_image, 0, dest_y, uint32_t, dst_stride, dst_line, 1); while (height--) @@ -1123,19 +954,9 @@ fast_composite_add_1000_1000 (pixman_implementation_t *imp, static void fast_composite_over_n_1_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t src, srca; uint32_t *dst, *dst_line; uint32_t *mask, *mask_line; @@ -1146,12 +967,12 @@ fast_composite_over_n_1_8888 (pixman_implementation_t *imp, if (width <= 0) return; - src = _pixman_image_get_solid (src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; if (src == 0) return; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t, mask_stride, mask_line, 1); @@ -1215,19 +1036,9 @@ fast_composite_over_n_1_8888 (pixman_implementation_t *imp, static void fast_composite_over_n_1_0565 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t src, srca; uint16_t *dst, *dst_line; uint32_t *mask, *mask_line; @@ -1240,12 +1051,12 @@ fast_composite_over_n_1_0565 (pixman_implementation_t *imp, if (width <= 0) return; - src = _pixman_image_get_solid (src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; if (src == 0) return; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t, mask_stride, mask_line, 1); @@ -1253,7 +1064,7 @@ fast_composite_over_n_1_0565 (pixman_implementation_t *imp, if (srca == 0xff) { - src565 = CONVERT_8888_TO_0565 (src); + src565 = convert_8888_to_0565 (src); while (height--) { dst = dst_line; @@ -1301,8 +1112,8 @@ fast_composite_over_n_1_0565 (pixman_implementation_t *imp, } if (bitcache & bitmask) { - d = over (src, CONVERT_0565_TO_0888 (*dst)); - *dst = CONVERT_8888_TO_0565 (d); + d = over (src, convert_0565_to_0888 (*dst)); + *dst = convert_8888_to_0565 (d); } bitmask = UPDATE_BITMASK (bitmask); dst++; @@ -1317,35 +1128,29 @@ fast_composite_over_n_1_0565 (pixman_implementation_t *imp, static void fast_composite_solid_fill (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t src; - src = _pixman_image_get_solid (src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - if (dst_image->bits.format == PIXMAN_a8) + if (dest_image->bits.format == PIXMAN_a1) + { + src = src >> 31; + } + else if (dest_image->bits.format == PIXMAN_a8) { src = src >> 24; } - else if (dst_image->bits.format == PIXMAN_r5g6b5 || - dst_image->bits.format == PIXMAN_b5g6r5) + else if (dest_image->bits.format == PIXMAN_r5g6b5 || + dest_image->bits.format == PIXMAN_b5g6r5) { - src = CONVERT_8888_TO_0565 (src); + src = convert_8888_to_0565 (src); } - pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride, - PIXMAN_FORMAT_BPP (dst_image->bits.format), + pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, + PIXMAN_FORMAT_BPP (dest_image->bits.format), dest_x, dest_y, width, height, src); @@ -1353,30 +1158,20 @@ fast_composite_solid_fill (pixman_implementation_t *imp, static void fast_composite_src_memcpy (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { - int bpp = PIXMAN_FORMAT_BPP (dst_image->bits.format) / 8; + PIXMAN_COMPOSITE_ARGS (info); + int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8; uint32_t n_bytes = width * bpp; int dst_stride, src_stride; uint8_t *dst; uint8_t *src; src_stride = src_image->bits.rowstride * 4; - dst_stride = dst_image->bits.rowstride * 4; + dst_stride = dest_image->bits.rowstride * 4; src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp; - dst = (uint8_t *)dst_image->bits.bits + dest_y * dst_stride + dest_x * bpp; + dst = (uint8_t *)dest_image->bits.bits + dest_y * dst_stride + dest_x * bpp; while (height--) { @@ -1387,43 +1182,211 @@ fast_composite_src_memcpy (pixman_implementation_t *imp, } } -FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER); -FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE); -FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD); -FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL); -FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER); -FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE); -FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD); -FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL); -FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER); -FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE); -FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD); -FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL); -FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL); -FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER); -FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE); -FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD); -FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL); +FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER) +FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE) +FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD) +FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL) +FAST_NEAREST (x888_8888_cover, x888, 8888, uint32_t, uint32_t, SRC, COVER) +FAST_NEAREST (x888_8888_pad, x888, 8888, uint32_t, uint32_t, SRC, PAD) +FAST_NEAREST (x888_8888_normal, x888, 8888, uint32_t, uint32_t, SRC, NORMAL) +FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER) +FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE) +FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD) +FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL) +FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER) +FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE) +FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD) +FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL) +FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL) +FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER) +FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE) +FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD) +FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL) + +#define REPEAT_MIN_WIDTH 32 + +static void +fast_composite_tiled_repeat (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + pixman_composite_func_t func; + pixman_format_code_t mask_format; + uint32_t src_flags, mask_flags; + int32_t sx, sy; + int32_t width_remain; + int32_t num_pixels; + int32_t src_width; + int32_t i, j; + pixman_image_t extended_src_image; + uint32_t extended_src[REPEAT_MIN_WIDTH * 2]; + pixman_bool_t need_src_extension; + uint32_t *src_line; + int32_t src_stride; + int32_t src_bpp; + pixman_composite_info_t info2 = *info; + + src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) | + FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; + + if (mask_image) + { + mask_format = mask_image->common.extended_format_code; + mask_flags = info->mask_flags; + } + else + { + mask_format = PIXMAN_null; + mask_flags = FAST_PATH_IS_OPAQUE; + } + + _pixman_implementation_lookup_composite ( + imp->toplevel, info->op, + src_image->common.extended_format_code, src_flags, + mask_format, mask_flags, + dest_image->common.extended_format_code, info->dest_flags, + &imp, &func); + + src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format); + + if (src_image->bits.width < REPEAT_MIN_WIDTH && + (src_bpp == 32 || src_bpp == 16 || src_bpp == 8) && + !src_image->bits.indexed) + { + sx = src_x; + sx = MOD (sx, src_image->bits.width); + sx += width; + src_width = 0; + + while (src_width < REPEAT_MIN_WIDTH && src_width <= sx) + src_width += src_image->bits.width; + + src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t); + + /* Initialize/validate stack-allocated temporary image */ + _pixman_bits_image_init (&extended_src_image, src_image->bits.format, + src_width, 1, &extended_src[0], src_stride, + FALSE); + _pixman_image_validate (&extended_src_image); + + info2.src_image = &extended_src_image; + need_src_extension = TRUE; + } + else + { + src_width = src_image->bits.width; + need_src_extension = FALSE; + } + + sx = src_x; + sy = src_y; + + while (--height >= 0) + { + sx = MOD (sx, src_width); + sy = MOD (sy, src_image->bits.height); + + if (need_src_extension) + { + if (src_bpp == 32) + { + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1); + + for (i = 0; i < src_width; ) + { + for (j = 0; j < src_image->bits.width; j++, i++) + extended_src[i] = src_line[j]; + } + } + else if (src_bpp == 16) + { + uint16_t *src_line_16; + + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride, + src_line_16, 1); + src_line = (uint32_t*)src_line_16; + + for (i = 0; i < src_width; ) + { + for (j = 0; j < src_image->bits.width; j++, i++) + ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j]; + } + } + else if (src_bpp == 8) + { + uint8_t *src_line_8; + + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride, + src_line_8, 1); + src_line = (uint32_t*)src_line_8; + + for (i = 0; i < src_width; ) + { + for (j = 0; j < src_image->bits.width; j++, i++) + ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j]; + } + } + + info2.src_y = 0; + } + else + { + info2.src_y = sy; + } + + width_remain = width; + + while (width_remain > 0) + { + num_pixels = src_width - sx; + + if (num_pixels > width_remain) + num_pixels = width_remain; + + info2.src_x = sx; + info2.width = num_pixels; + info2.height = 1; + + func (imp, &info2); + + width_remain -= num_pixels; + info2.mask_x += num_pixels; + info2.dest_x += num_pixels; + sx = 0; + } + + sx = src_x; + sy++; + info2.mask_x = info->mask_x; + info2.mask_y++; + info2.dest_x = info->dest_x; + info2.dest_y++; + } + + if (need_src_extension) + _pixman_image_fini (&extended_src_image); +} /* Use more unrolling for src_0565_0565 because it is typically CPU bound */ static force_inline void -scaled_nearest_scanline_565_565_SRC (uint16_t * dst, - uint16_t * src, - int32_t w, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - pixman_fixed_t max_vx) +scaled_nearest_scanline_565_565_SRC (uint16_t * dst, + const uint16_t * src, + int32_t w, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t fully_transparent_src) { uint16_t tmp1, tmp2, tmp3, tmp4; while ((w -= 4) >= 0) { - tmp1 = src[pixman_fixed_to_int (vx)]; + tmp1 = *(src + pixman_fixed_to_int (vx)); vx += unit_x; - tmp2 = src[pixman_fixed_to_int (vx)]; + tmp2 = *(src + pixman_fixed_to_int (vx)); vx += unit_x; - tmp3 = src[pixman_fixed_to_int (vx)]; + tmp3 = *(src + pixman_fixed_to_int (vx)); vx += unit_x; - tmp4 = src[pixman_fixed_to_int (vx)]; + tmp4 = *(src + pixman_fixed_to_int (vx)); vx += unit_x; *dst++ = tmp1; *dst++ = tmp2; @@ -1432,26 +1395,26 @@ scaled_nearest_scanline_565_565_SRC (uint16_t * dst, } if (w & 2) { - tmp1 = src[pixman_fixed_to_int (vx)]; + tmp1 = *(src + pixman_fixed_to_int (vx)); vx += unit_x; - tmp2 = src[pixman_fixed_to_int (vx)]; + tmp2 = *(src + pixman_fixed_to_int (vx)); vx += unit_x; *dst++ = tmp1; *dst++ = tmp2; } if (w & 1) - *dst++ = src[pixman_fixed_to_int (vx)]; + *dst = *(src + pixman_fixed_to_int (vx)); } FAST_NEAREST_MAINLOOP (565_565_cover_SRC, scaled_nearest_scanline_565_565_SRC, - uint16_t, uint16_t, COVER); + uint16_t, uint16_t, COVER) FAST_NEAREST_MAINLOOP (565_565_none_SRC, scaled_nearest_scanline_565_565_SRC, - uint16_t, uint16_t, NONE); + uint16_t, uint16_t, NONE) FAST_NEAREST_MAINLOOP (565_565_pad_SRC, scaled_nearest_scanline_565_565_SRC, - uint16_t, uint16_t, PAD); + uint16_t, uint16_t, PAD) static force_inline uint32_t fetch_nearest (pixman_repeat_t src_repeat, @@ -1460,7 +1423,7 @@ fetch_nearest (pixman_repeat_t src_repeat, { if (repeat (src_repeat, &x, src_width)) { - if (format == PIXMAN_x8r8g8b8) + if (format == PIXMAN_x8r8g8b8 || format == PIXMAN_x8b8g8r8) return *(src + x) | 0xff000000; else return *(src + x); @@ -1493,19 +1456,9 @@ combine_src (uint32_t s, uint32_t *dst) static void fast_composite_scaled_nearest (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t *dst_line; uint32_t *src_line; int dst_stride, src_stride; @@ -1516,7 +1469,7 @@ fast_composite_scaled_nearest (pixman_implementation_t *imp, pixman_vector_t v; pixman_fixed_t vy; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); /* pass in 0 instead of src_x and src_y because src_x and src_y need to be * transformed from destination space to source space */ @@ -1613,6 +1566,252 @@ fast_composite_scaled_nearest (pixman_implementation_t *imp, } } +#define CACHE_LINE_SIZE 64 + +#define FAST_SIMPLE_ROTATE(suffix, pix_type) \ + \ +static void \ +blt_rotated_90_trivial_##suffix (pix_type *dst, \ + int dst_stride, \ + const pix_type *src, \ + int src_stride, \ + int w, \ + int h) \ +{ \ + int x, y; \ + for (y = 0; y < h; y++) \ + { \ + const pix_type *s = src + (h - y - 1); \ + pix_type *d = dst + dst_stride * y; \ + for (x = 0; x < w; x++) \ + { \ + *d++ = *s; \ + s += src_stride; \ + } \ + } \ +} \ + \ +static void \ +blt_rotated_270_trivial_##suffix (pix_type *dst, \ + int dst_stride, \ + const pix_type *src, \ + int src_stride, \ + int w, \ + int h) \ +{ \ + int x, y; \ + for (y = 0; y < h; y++) \ + { \ + const pix_type *s = src + src_stride * (w - 1) + y; \ + pix_type *d = dst + dst_stride * y; \ + for (x = 0; x < w; x++) \ + { \ + *d++ = *s; \ + s -= src_stride; \ + } \ + } \ +} \ + \ +static void \ +blt_rotated_90_##suffix (pix_type *dst, \ + int dst_stride, \ + const pix_type *src, \ + int src_stride, \ + int W, \ + int H) \ +{ \ + int x; \ + int leading_pixels = 0, trailing_pixels = 0; \ + const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \ + \ + /* \ + * split processing into handling destination as TILE_SIZExH cache line \ + * aligned vertical stripes (optimistically assuming that destination \ + * stride is a multiple of cache line, if not - it will be just a bit \ + * slower) \ + */ \ + \ + if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \ + { \ + leading_pixels = TILE_SIZE - (((uintptr_t)dst & \ + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ + if (leading_pixels > W) \ + leading_pixels = W; \ + \ + /* unaligned leading part NxH (where N < TILE_SIZE) */ \ + blt_rotated_90_trivial_##suffix ( \ + dst, \ + dst_stride, \ + src, \ + src_stride, \ + leading_pixels, \ + H); \ + \ + dst += leading_pixels; \ + src += leading_pixels * src_stride; \ + W -= leading_pixels; \ + } \ + \ + if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \ + { \ + trailing_pixels = (((uintptr_t)(dst + W) & \ + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ + if (trailing_pixels > W) \ + trailing_pixels = W; \ + W -= trailing_pixels; \ + } \ + \ + for (x = 0; x < W; x += TILE_SIZE) \ + { \ + /* aligned middle part TILE_SIZExH */ \ + blt_rotated_90_trivial_##suffix ( \ + dst + x, \ + dst_stride, \ + src + src_stride * x, \ + src_stride, \ + TILE_SIZE, \ + H); \ + } \ + \ + if (trailing_pixels) \ + { \ + /* unaligned trailing part NxH (where N < TILE_SIZE) */ \ + blt_rotated_90_trivial_##suffix ( \ + dst + W, \ + dst_stride, \ + src + W * src_stride, \ + src_stride, \ + trailing_pixels, \ + H); \ + } \ +} \ + \ +static void \ +blt_rotated_270_##suffix (pix_type *dst, \ + int dst_stride, \ + const pix_type *src, \ + int src_stride, \ + int W, \ + int H) \ +{ \ + int x; \ + int leading_pixels = 0, trailing_pixels = 0; \ + const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \ + \ + /* \ + * split processing into handling destination as TILE_SIZExH cache line \ + * aligned vertical stripes (optimistically assuming that destination \ + * stride is a multiple of cache line, if not - it will be just a bit \ + * slower) \ + */ \ + \ + if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \ + { \ + leading_pixels = TILE_SIZE - (((uintptr_t)dst & \ + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ + if (leading_pixels > W) \ + leading_pixels = W; \ + \ + /* unaligned leading part NxH (where N < TILE_SIZE) */ \ + blt_rotated_270_trivial_##suffix ( \ + dst, \ + dst_stride, \ + src + src_stride * (W - leading_pixels), \ + src_stride, \ + leading_pixels, \ + H); \ + \ + dst += leading_pixels; \ + W -= leading_pixels; \ + } \ + \ + if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \ + { \ + trailing_pixels = (((uintptr_t)(dst + W) & \ + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ + if (trailing_pixels > W) \ + trailing_pixels = W; \ + W -= trailing_pixels; \ + src += trailing_pixels * src_stride; \ + } \ + \ + for (x = 0; x < W; x += TILE_SIZE) \ + { \ + /* aligned middle part TILE_SIZExH */ \ + blt_rotated_270_trivial_##suffix ( \ + dst + x, \ + dst_stride, \ + src + src_stride * (W - x - TILE_SIZE), \ + src_stride, \ + TILE_SIZE, \ + H); \ + } \ + \ + if (trailing_pixels) \ + { \ + /* unaligned trailing part NxH (where N < TILE_SIZE) */ \ + blt_rotated_270_trivial_##suffix ( \ + dst + W, \ + dst_stride, \ + src - trailing_pixels * src_stride, \ + src_stride, \ + trailing_pixels, \ + H); \ + } \ +} \ + \ +static void \ +fast_composite_rotate_90_##suffix (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + pix_type *dst_line; \ + pix_type *src_line; \ + int dst_stride, src_stride; \ + int src_x_t, src_y_t; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type, \ + dst_stride, dst_line, 1); \ + src_x_t = -src_y + pixman_fixed_to_int ( \ + src_image->common.transform->matrix[0][2] + \ + pixman_fixed_1 / 2 - pixman_fixed_e) - height;\ + src_y_t = src_x + pixman_fixed_to_int ( \ + src_image->common.transform->matrix[1][2] + \ + pixman_fixed_1 / 2 - pixman_fixed_e); \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \ + src_stride, src_line, 1); \ + blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride, \ + width, height); \ +} \ + \ +static void \ +fast_composite_rotate_270_##suffix (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + pix_type *dst_line; \ + pix_type *src_line; \ + int dst_stride, src_stride; \ + int src_x_t, src_y_t; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type, \ + dst_stride, dst_line, 1); \ + src_x_t = src_y + pixman_fixed_to_int ( \ + src_image->common.transform->matrix[0][2] + \ + pixman_fixed_1 / 2 - pixman_fixed_e); \ + src_y_t = -src_x + pixman_fixed_to_int ( \ + src_image->common.transform->matrix[1][2] + \ + pixman_fixed_1 / 2 - pixman_fixed_e) - width; \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \ + src_stride, src_line, 1); \ + blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride, \ + width, height); \ +} + +FAST_SIMPLE_ROTATE (8, uint8_t) +FAST_SIMPLE_ROTATE (565, uint16_t) +FAST_SIMPLE_ROTATE (8888, uint32_t) + static const pixman_fast_path_t c_fast_paths[] = { PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565), @@ -1645,16 +1844,19 @@ static const pixman_fast_path_t c_fast_paths[] = PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, fast_composite_add_0565_0565), + PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, fast_composite_add_0565_0565), PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888), PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888), PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8), - PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1000_1000), + PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1_1), PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca), PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8), PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill), PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill), PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill), PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill), PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill), PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill), PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888), @@ -1675,10 +1877,6 @@ static const pixman_fast_path_t c_fast_paths[] = PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565), PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8), PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8), @@ -1695,6 +1893,13 @@ static const pixman_fast_path_t c_fast_paths[] = SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565), + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, x888_8888), + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, x888_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, x888_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, x888_8888), + SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, x888_8888), + SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, x888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888), SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888), SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888), @@ -1730,9 +1935,121 @@ static const pixman_fast_path_t c_fast_paths[] = NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8), NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8), +#define SIMPLE_ROTATE_FLAGS(angle) \ + (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM | \ + FAST_PATH_NEAREST_FILTER | \ + FAST_PATH_SAMPLES_COVER_CLIP_NEAREST | \ + FAST_PATH_STANDARD_FLAGS) + +#define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_rotate_90_##suffix, \ + }, \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_rotate_270_##suffix, \ + } + + SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888), + SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888), + SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888), + SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565), + SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8), + + /* Simple repeat fast path entry. */ + { PIXMAN_OP_any, + PIXMAN_any, + (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE | + FAST_PATH_NORMAL_REPEAT), + PIXMAN_any, 0, + PIXMAN_any, FAST_PATH_STD_DEST_FLAGS, + fast_composite_tiled_repeat + }, + { PIXMAN_OP_NONE }, }; +#ifdef WORDS_BIGENDIAN +#define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (32 - (offs) - (n))) +#else +#define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs)) +#endif + +static force_inline void +pixman_fill1_line (uint32_t *dst, int offs, int width, int v) +{ + if (offs) + { + int leading_pixels = 32 - offs; + if (leading_pixels >= width) + { + if (v) + *dst |= A1_FILL_MASK (width, offs); + else + *dst &= ~A1_FILL_MASK (width, offs); + return; + } + else + { + if (v) + *dst++ |= A1_FILL_MASK (leading_pixels, offs); + else + *dst++ &= ~A1_FILL_MASK (leading_pixels, offs); + width -= leading_pixels; + } + } + while (width >= 32) + { + if (v) + *dst++ = 0xFFFFFFFF; + else + *dst++ = 0; + width -= 32; + } + if (width > 0) + { + if (v) + *dst |= A1_FILL_MASK (width, 0); + else + *dst &= ~A1_FILL_MASK (width, 0); + } +} + +static void +pixman_fill1 (uint32_t *bits, + int stride, + int x, + int y, + int width, + int height, + uint32_t filler) +{ + uint32_t *dst = bits + y * stride + (x >> 5); + int offs = x & 31; + + if (filler & 1) + { + while (height--) + { + pixman_fill1_line (dst, offs, width, 1); + dst += stride; + } + } + else + { + while (height--) + { + pixman_fill1_line (dst, offs, width, 0); + dst += stride; + } + } +} + static void pixman_fill8 (uint32_t *bits, int stride, @@ -1740,11 +2057,11 @@ pixman_fill8 (uint32_t *bits, int y, int width, int height, - uint32_t xor) + uint32_t filler) { int byte_stride = stride * (int) sizeof (uint32_t); uint8_t *dst = (uint8_t *) bits; - uint8_t v = xor & 0xff; + uint8_t v = filler & 0xff; int i; dst = dst + y * byte_stride + x; @@ -1765,12 +2082,12 @@ pixman_fill16 (uint32_t *bits, int y, int width, int height, - uint32_t xor) + uint32_t filler) { int short_stride = (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t); uint16_t *dst = (uint16_t *)bits; - uint16_t v = xor & 0xffff; + uint16_t v = filler & 0xffff; int i; dst = dst + y * short_stride + x; @@ -1791,7 +2108,7 @@ pixman_fill32 (uint32_t *bits, int y, int width, int height, - uint32_t xor) + uint32_t filler) { int i; @@ -1800,7 +2117,7 @@ pixman_fill32 (uint32_t *bits, while (height--) { for (i = 0; i < width; ++i) - bits[i] = xor; + bits[i] = filler; bits += stride; } @@ -1815,38 +2132,227 @@ fast_path_fill (pixman_implementation_t *imp, int y, int width, int height, - uint32_t xor) + uint32_t filler) { switch (bpp) { + case 1: + pixman_fill1 (bits, stride, x, y, width, height, filler); + break; + case 8: - pixman_fill8 (bits, stride, x, y, width, height, xor); + pixman_fill8 (bits, stride, x, y, width, height, filler); break; case 16: - pixman_fill16 (bits, stride, x, y, width, height, xor); + pixman_fill16 (bits, stride, x, y, width, height, filler); break; case 32: - pixman_fill32 (bits, stride, x, y, width, height, xor); + pixman_fill32 (bits, stride, x, y, width, height, filler); break; default: - return _pixman_implementation_fill ( - imp->delegate, bits, stride, bpp, x, y, width, height, xor); - break; + return FALSE; } return TRUE; } -pixman_implementation_t * -_pixman_implementation_create_fast_path (void) +/*****************************************************************************/ + +static uint32_t * +fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) { - pixman_implementation_t *general = _pixman_implementation_create_general (); - pixman_implementation_t *imp = _pixman_implementation_create (general, c_fast_paths); + int32_t w = iter->width; + uint32_t *dst = iter->buffer; + const uint16_t *src = (const uint16_t *)iter->bits; + + iter->bits += iter->stride; + + /* Align the source buffer at 4 bytes boundary */ + if (w > 0 && ((uintptr_t)src & 3)) + { + *dst++ = convert_0565_to_8888 (*src++); + w--; + } + /* Process two pixels per iteration */ + while ((w -= 2) >= 0) + { + uint32_t sr, sb, sg, t0, t1; + uint32_t s = *(const uint32_t *)src; + src += 2; + sr = (s >> 8) & 0x00F800F8; + sb = (s << 3) & 0x00F800F8; + sg = (s >> 3) & 0x00FC00FC; + sr |= sr >> 5; + sb |= sb >> 5; + sg |= sg >> 6; + t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) | + (sb & 0xFF) | 0xFF000000; + t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) | + (sb >> 16) | 0xFF000000; +#ifdef WORDS_BIGENDIAN + *dst++ = t1; + *dst++ = t0; +#else + *dst++ = t0; + *dst++ = t1; +#endif + } + if (w & 1) + { + *dst = convert_0565_to_8888 (*src); + } + + return iter->buffer; +} + +static uint32_t * +fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask) +{ + iter->bits += iter->stride; + return iter->buffer; +} + +/* Helper function for a workaround, which tries to ensure that 0x1F001F + * constant is always allocated in a register on RISC architectures. + */ +static force_inline uint32_t +convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F) +{ + uint32_t a, b; + a = (s >> 3) & x1F001F; + b = s & 0xFC00; + a |= a >> 5; + a |= b >> 5; + return a; +} + +static void +fast_write_back_r5g6b5 (pixman_iter_t *iter) +{ + int32_t w = iter->width; + uint16_t *dst = (uint16_t *)(iter->bits - iter->stride); + const uint32_t *src = iter->buffer; + /* Workaround to ensure that x1F001F variable is allocated in a register */ + static volatile uint32_t volatile_x1F001F = 0x1F001F; + uint32_t x1F001F = volatile_x1F001F; + + while ((w -= 4) >= 0) + { + uint32_t s1 = *src++; + uint32_t s2 = *src++; + uint32_t s3 = *src++; + uint32_t s4 = *src++; + *dst++ = convert_8888_to_0565_workaround (s1, x1F001F); + *dst++ = convert_8888_to_0565_workaround (s2, x1F001F); + *dst++ = convert_8888_to_0565_workaround (s3, x1F001F); + *dst++ = convert_8888_to_0565_workaround (s4, x1F001F); + } + if (w & 2) + { + *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F); + *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F); + } + if (w & 1) + { + *dst = convert_8888_to_0565_workaround (*src, x1F001F); + } +} + +typedef struct +{ + pixman_format_code_t format; + pixman_iter_get_scanline_t get_scanline; + pixman_iter_write_back_t write_back; +} fetcher_info_t; + +static const fetcher_info_t fetchers[] = +{ + { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 }, + { PIXMAN_null } +}; + +static pixman_bool_t +fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +{ + pixman_image_t *image = iter->image; + +#define FLAGS \ + (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ + FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) + + if ((iter->iter_flags & ITER_NARROW) && + (iter->image_flags & FLAGS) == FLAGS) + { + const fetcher_info_t *f; + + for (f = &fetchers[0]; f->format != PIXMAN_null; f++) + { + if (image->common.extended_format_code == f->format) + { + uint8_t *b = (uint8_t *)image->bits.bits; + int s = image->bits.rowstride * 4; + + iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; + iter->stride = s; + + iter->get_scanline = f->get_scanline; + return TRUE; + } + } + } + + return FALSE; +} + +static pixman_bool_t +fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +{ + pixman_image_t *image = iter->image; + + if ((iter->iter_flags & ITER_NARROW) && + (iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS) + { + const fetcher_info_t *f; + + for (f = &fetchers[0]; f->format != PIXMAN_null; f++) + { + if (image->common.extended_format_code == f->format) + { + uint8_t *b = (uint8_t *)image->bits.bits; + int s = image->bits.rowstride * 4; + + iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; + iter->stride = s; + + if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == + (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) + { + iter->get_scanline = fast_dest_fetch_noop; + } + else + { + iter->get_scanline = f->get_scanline; + } + iter->write_back = f->write_back; + return TRUE; + } + } + } + return FALSE; +} + + +pixman_implementation_t * +_pixman_implementation_create_fast_path (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths); imp->fill = fast_path_fill; + imp->src_iter_init = fast_src_iter_init; + imp->dest_iter_init = fast_dest_iter_init; return imp; } diff --git a/programs/develop/libraries/pixman/pixman-fast-path.h b/programs/develop/libraries/pixman/pixman-fast-path.h deleted file mode 100644 index 98ef81ea7d..0000000000 --- a/programs/develop/libraries/pixman/pixman-fast-path.h +++ /dev/null @@ -1,451 +0,0 @@ -/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of SuSE not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. SuSE makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * Author: Keith Packard, SuSE, Inc. - */ - -#ifndef PIXMAN_FAST_PATH_H__ -#define PIXMAN_FAST_PATH_H__ - -#include "pixman-private.h" - -#define PIXMAN_REPEAT_COVER -1 - -static force_inline pixman_bool_t -repeat (pixman_repeat_t repeat, int *c, int size) -{ - if (repeat == PIXMAN_REPEAT_NONE) - { - if (*c < 0 || *c >= size) - return FALSE; - } - else if (repeat == PIXMAN_REPEAT_NORMAL) - { - while (*c >= size) - *c -= size; - while (*c < 0) - *c += size; - } - else if (repeat == PIXMAN_REPEAT_PAD) - { - *c = CLIP (*c, 0, size - 1); - } - else /* REFLECT */ - { - *c = MOD (*c, size * 2); - if (*c >= size) - *c = size * 2 - *c - 1; - } - return TRUE; -} - -/* - * For each scanline fetched from source image with PAD repeat: - * - calculate how many pixels need to be padded on the left side - * - calculate how many pixels need to be padded on the right side - * - update width to only count pixels which are fetched from the image - * All this information is returned via 'width', 'left_pad', 'right_pad' - * arguments. The code is assuming that 'unit_x' is positive. - * - * Note: 64-bit math is used in order to avoid potential overflows, which - * is probably excessive in many cases. This particular function - * may need its own correctness test and performance tuning. - */ -static force_inline void -pad_repeat_get_scanline_bounds (int32_t source_image_width, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - int32_t * width, - int32_t * left_pad, - int32_t * right_pad) -{ - int64_t max_vx = (int64_t) source_image_width << 16; - int64_t tmp; - if (vx < 0) - { - tmp = ((int64_t) unit_x - 1 - vx) / unit_x; - if (tmp > *width) - { - *left_pad = *width; - *width = 0; - } - else - { - *left_pad = (int32_t) tmp; - *width -= (int32_t) tmp; - } - } - else - { - *left_pad = 0; - } - tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad; - if (tmp < 0) - { - *right_pad = *width; - *width = 0; - } - else if (tmp >= *width) - { - *right_pad = 0; - } - else - { - *right_pad = *width - (int32_t) tmp; - *width = (int32_t) tmp; - } -} - -/* A macroified version of specialized nearest scalers for some - * common 8888 and 565 formats. It supports SRC and OVER ops. - * - * There are two repeat versions, one that handles repeat normal, - * and one without repeat handling that only works if the src region - * used is completely covered by the pre-repeated source samples. - * - * The loops are unrolled to process two pixels per iteration for better - * performance on most CPU architectures (superscalar processors - * can issue several operations simultaneously, other processors can hide - * instructions latencies by pipelining operations). Unrolling more - * does not make much sense because the compiler will start running out - * of spare registers soon. - */ - -#define GET_8888_ALPHA(s) ((s) >> 24) - /* This is not actually used since we don't have an OVER with - 565 source, but it is needed to build. */ -#define GET_0565_ALPHA(s) 0xff - -#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \ - src_type_t, dst_type_t, OP, repeat_mode) \ -static force_inline void \ -scanline_func_name (dst_type_t *dst, \ - src_type_t *src, \ - int32_t w, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx) \ -{ \ - uint32_t d; \ - src_type_t s1, s2; \ - uint8_t a1, a2; \ - int x1, x2; \ - \ - if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \ - abort(); \ - \ - while ((w -= 2) >= 0) \ - { \ - x1 = vx >> 16; \ - vx += unit_x; \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - { \ - /* This works because we know that unit_x is positive */ \ - while (vx >= max_vx) \ - vx -= max_vx; \ - } \ - s1 = src[x1]; \ - \ - x2 = vx >> 16; \ - vx += unit_x; \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - { \ - /* This works because we know that unit_x is positive */ \ - while (vx >= max_vx) \ - vx -= max_vx; \ - } \ - s2 = src[x2]; \ - \ - if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ - { \ - a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ - a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \ - \ - if (a1 == 0xff) \ - { \ - *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ - } \ - else if (s1) \ - { \ - d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst); \ - s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \ - a1 ^= 0xff; \ - UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ - *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ - } \ - dst++; \ - \ - if (a2 == 0xff) \ - { \ - *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \ - } \ - else if (s2) \ - { \ - d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \ - s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2); \ - a2 ^= 0xff; \ - UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \ - *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ - } \ - dst++; \ - } \ - else /* PIXMAN_OP_SRC */ \ - { \ - *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ - *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \ - } \ - } \ - \ - if (w & 1) \ - { \ - x1 = vx >> 16; \ - s1 = src[x1]; \ - \ - if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ - { \ - a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ - \ - if (a1 == 0xff) \ - { \ - *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ - } \ - else if (s1) \ - { \ - d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \ - s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \ - a1 ^= 0xff; \ - UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ - *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ - } \ - dst++; \ - } \ - else /* PIXMAN_OP_SRC */ \ - { \ - *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ - } \ - } \ -} - -#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, dst_type_t, \ - repeat_mode) \ -static void \ -fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dst_x, \ - int32_t dst_y, \ - int32_t width, \ - int32_t height) \ -{ \ - dst_type_t *dst_line; \ - src_type_t *src_first_line; \ - int y; \ - pixman_fixed_t max_vx = max_vx; /* suppress uninitialized variable warning */ \ - pixman_fixed_t max_vy; \ - pixman_vector_t v; \ - pixman_fixed_t vx, vy; \ - pixman_fixed_t unit_x, unit_y; \ - int32_t left_pad, right_pad; \ - \ - src_type_t *src; \ - dst_type_t *dst; \ - int src_stride, dst_stride; \ - \ - PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \ - /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ - * transformed from destination space to source space */ \ - PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ - \ - /* reference point is the center of the pixel */ \ - v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ - v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ - v.vector[2] = pixman_fixed_1; \ - \ - if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ - return; \ - \ - unit_x = src_image->common.transform->matrix[0][0]; \ - unit_y = src_image->common.transform->matrix[1][1]; \ - \ - /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \ - v.vector[0] -= pixman_fixed_e; \ - v.vector[1] -= pixman_fixed_e; \ - \ - vx = v.vector[0]; \ - vy = v.vector[1]; \ - \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - { \ - /* Clamp repeating positions inside the actual samples */ \ - max_vx = src_image->bits.width << 16; \ - max_vy = src_image->bits.height << 16; \ - \ - repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \ - repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ - } \ - \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ - PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ - { \ - pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \ - &width, &left_pad, &right_pad); \ - vx += left_pad * unit_x; \ - } \ - \ - while (--height >= 0) \ - { \ - dst = dst_line; \ - dst_line += dst_stride; \ - \ - y = vy >> 16; \ - vy += unit_y; \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ - { \ - repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \ - src = src_first_line + src_stride * y; \ - if (left_pad > 0) \ - { \ - scanline_func (dst, src, left_pad, 0, 0, 0); \ - } \ - if (width > 0) \ - { \ - scanline_func (dst + left_pad, src, width, vx, unit_x, 0); \ - } \ - if (right_pad > 0) \ - { \ - scanline_func (dst + left_pad + width, src + src_image->bits.width - 1, \ - right_pad, 0, 0, 0); \ - } \ - } \ - else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ - { \ - static src_type_t zero = 0; \ - if (y < 0 || y >= src_image->bits.height) \ - { \ - scanline_func (dst, &zero, left_pad + width + right_pad, 0, 0, 0); \ - continue; \ - } \ - src = src_first_line + src_stride * y; \ - if (left_pad > 0) \ - { \ - scanline_func (dst, &zero, left_pad, 0, 0, 0); \ - } \ - if (width > 0) \ - { \ - scanline_func (dst + left_pad, src, width, vx, unit_x, 0); \ - } \ - if (right_pad > 0) \ - { \ - scanline_func (dst + left_pad + width, &zero, right_pad, 0, 0, 0); \ - } \ - } \ - else \ - { \ - src = src_first_line + src_stride * y; \ - scanline_func (dst, src, width, vx, unit_x, max_vx); \ - } \ - } \ -} - -/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ -#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \ - repeat_mode) \ - FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, dst_type_t, \ - repeat_mode) \ - -#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \ - src_type_t, dst_type_t, OP, repeat_mode) \ - FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ - SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \ - OP, repeat_mode) \ - FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name ## _ ## OP, \ - scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ - src_type_t, dst_type_t, repeat_mode) \ - \ - extern int no_such_variable - - -#define SCALED_NEAREST_FLAGS \ - (FAST_PATH_SCALE_TRANSFORM | \ - FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_NEAREST_FILTER | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_NARROW_FORMAT) - -#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_NORMAL_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ - } - -#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_PAD_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ - } - -#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ - } - -#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ - } - -/* Prefer the use of 'cover' variant, because it is faster */ -#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ - SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func) - -#endif diff --git a/programs/develop/libraries/pixman/pixman-filter.c b/programs/develop/libraries/pixman/pixman-filter.c new file mode 100644 index 0000000000..5ff7b6eaad --- /dev/null +++ b/programs/develop/libraries/pixman/pixman-filter.c @@ -0,0 +1,350 @@ +/* + * Copyright 2012, Red Hat, Inc. + * Copyright 2012, Soren Sandmann + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Soren Sandmann + */ +#include +#include +#include +#include +#include +#ifdef HAVE_CONFIG_H +#include +#endif +#include "pixman-private.h" + +typedef double (* kernel_func_t) (double x); + +typedef struct +{ + pixman_kernel_t kernel; + kernel_func_t func; + double width; +} filter_info_t; + +static double +impulse_kernel (double x) +{ + return (x == 0.0)? 1.0 : 0.0; +} + +static double +box_kernel (double x) +{ + return 1; +} + +static double +linear_kernel (double x) +{ + return 1 - fabs (x); +} + +static double +gaussian_kernel (double x) +{ +#define SQRT2 (1.4142135623730950488016887242096980785696718753769480) +#define SIGMA (SQRT2 / 2.0) + + return exp (- x * x / (2 * SIGMA * SIGMA)) / (SIGMA * sqrt (2.0 * M_PI)); +} + +static double +sinc (double x) +{ + if (x == 0.0) + return 1.0; + else + return sin (M_PI * x) / (M_PI * x); +} + +static double +lanczos (double x, int n) +{ + return sinc (x) * sinc (x * (1.0 / n)); +} + +static double +lanczos2_kernel (double x) +{ + return lanczos (x, 2); +} + +static double +lanczos3_kernel (double x) +{ + return lanczos (x, 3); +} + +static double +nice_kernel (double x) +{ + return lanczos3_kernel (x * 0.75); +} + +static double +general_cubic (double x, double B, double C) +{ + double ax = fabs(x); + + if (ax < 1) + { + return ((12 - 9 * B - 6 * C) * ax * ax * ax + + (-18 + 12 * B + 6 * C) * ax * ax + (6 - 2 * B)) / 6; + } + else if (ax >= 1 && ax < 2) + { + return ((-B - 6 * C) * ax * ax * ax + + (6 * B + 30 * C) * ax * ax + (-12 * B - 48 * C) * + ax + (8 * B + 24 * C)) / 6; + } + else + { + return 0; + } +} + +static double +cubic_kernel (double x) +{ + /* This is the Mitchell-Netravali filter. + * + * (0.0, 0.5) would give us the Catmull-Rom spline, + * but that one seems to be indistinguishable from Lanczos2. + */ + return general_cubic (x, 1/3.0, 1/3.0); +} + +static const filter_info_t filters[] = +{ + { PIXMAN_KERNEL_IMPULSE, impulse_kernel, 0.0 }, + { PIXMAN_KERNEL_BOX, box_kernel, 1.0 }, + { PIXMAN_KERNEL_LINEAR, linear_kernel, 2.0 }, + { PIXMAN_KERNEL_CUBIC, cubic_kernel, 4.0 }, + { PIXMAN_KERNEL_GAUSSIAN, gaussian_kernel, 6 * SIGMA }, + { PIXMAN_KERNEL_LANCZOS2, lanczos2_kernel, 4.0 }, + { PIXMAN_KERNEL_LANCZOS3, lanczos3_kernel, 6.0 }, + { PIXMAN_KERNEL_LANCZOS3_STRETCHED, nice_kernel, 8.0 }, +}; + +/* This function scales @kernel2 by @scale, then + * aligns @x1 in @kernel1 with @x2 in @kernel2 and + * and integrates the product of the kernels across @width. + * + * This function assumes that the intervals are within + * the kernels in question. E.g., the caller must not + * try to integrate a linear kernel ouside of [-1:1] + */ +static double +integral (pixman_kernel_t kernel1, double x1, + pixman_kernel_t kernel2, double scale, double x2, + double width) +{ + /* If the integration interval crosses zero, break it into + * two separate integrals. This ensures that filters such + * as LINEAR that are not differentiable at 0 will still + * integrate properly. + */ + if (x1 < 0 && x1 + width > 0) + { + return + integral (kernel1, x1, kernel2, scale, x2, - x1) + + integral (kernel1, 0, kernel2, scale, x2 - x1, width + x1); + } + else if (x2 < 0 && x2 + width > 0) + { + return + integral (kernel1, x1, kernel2, scale, x2, - x2) + + integral (kernel1, x1 - x2, kernel2, scale, 0, width + x2); + } + else if (kernel1 == PIXMAN_KERNEL_IMPULSE) + { + assert (width == 0.0); + return filters[kernel2].func (x2 * scale); + } + else if (kernel2 == PIXMAN_KERNEL_IMPULSE) + { + assert (width == 0.0); + return filters[kernel1].func (x1); + } + else + { + /* Integration via Simpson's rule */ +#define N_SEGMENTS 128 +#define SAMPLE(a1, a2) \ + (filters[kernel1].func ((a1)) * filters[kernel2].func ((a2) * scale)) + + double s = 0.0; + double h = width / (double)N_SEGMENTS; + int i; + + s = SAMPLE (x1, x2); + + for (i = 1; i < N_SEGMENTS; i += 2) + { + double a1 = x1 + h * i; + double a2 = x2 + h * i; + + s += 2 * SAMPLE (a1, a2); + + if (i >= 2 && i < N_SEGMENTS - 1) + s += 4 * SAMPLE (a1, a2); + } + + s += SAMPLE (x1 + width, x2 + width); + + return h * s * (1.0 / 3.0); + } +} + +static pixman_fixed_t * +create_1d_filter (int *width, + pixman_kernel_t reconstruct, + pixman_kernel_t sample, + double scale, + int n_phases) +{ + pixman_fixed_t *params, *p; + double step; + double size; + int i; + + size = scale * filters[sample].width + filters[reconstruct].width; + *width = ceil (size); + + p = params = malloc (*width * n_phases * sizeof (pixman_fixed_t)); + if (!params) + return NULL; + + step = 1.0 / n_phases; + + for (i = 0; i < n_phases; ++i) + { + double frac = step / 2.0 + i * step; + pixman_fixed_t new_total; + int x, x1, x2; + double total; + + /* Sample convolution of reconstruction and sampling + * filter. See rounding.txt regarding the rounding + * and sample positions. + */ + + x1 = ceil (frac - *width / 2.0 - 0.5); + x2 = x1 + *width; + + total = 0; + for (x = x1; x < x2; ++x) + { + double pos = x + 0.5 - frac; + double rlow = - filters[reconstruct].width / 2.0; + double rhigh = rlow + filters[reconstruct].width; + double slow = pos - scale * filters[sample].width / 2.0; + double shigh = slow + scale * filters[sample].width; + double c = 0.0; + double ilow, ihigh; + + if (rhigh >= slow && rlow <= shigh) + { + ilow = MAX (slow, rlow); + ihigh = MIN (shigh, rhigh); + + c = integral (reconstruct, ilow, + sample, 1.0 / scale, ilow - pos, + ihigh - ilow); + } + + total += c; + *p++ = (pixman_fixed_t)(c * 65535.0 + 0.5); + } + + /* Normalize */ + p -= *width; + total = 1 / total; + new_total = 0; + for (x = x1; x < x2; ++x) + { + pixman_fixed_t t = (*p) * total + 0.5; + + new_total += t; + *p++ = t; + } + + if (new_total != pixman_fixed_1) + *(p - *width / 2) += (pixman_fixed_1 - new_total); + } + + return params; +} + +/* Create the parameter list for a SEPARABLE_CONVOLUTION filter + * with the given kernels and scale parameters + */ +PIXMAN_EXPORT pixman_fixed_t * +pixman_filter_create_separable_convolution (int *n_values, + pixman_fixed_t scale_x, + pixman_fixed_t scale_y, + pixman_kernel_t reconstruct_x, + pixman_kernel_t reconstruct_y, + pixman_kernel_t sample_x, + pixman_kernel_t sample_y, + int subsample_bits_x, + int subsample_bits_y) +{ + double sx = fabs (pixman_fixed_to_double (scale_x)); + double sy = fabs (pixman_fixed_to_double (scale_y)); + pixman_fixed_t *horz = NULL, *vert = NULL, *params = NULL; + int subsample_x, subsample_y; + int width, height; + + subsample_x = (1 << subsample_bits_x); + subsample_y = (1 << subsample_bits_y); + + horz = create_1d_filter (&width, reconstruct_x, sample_x, sx, subsample_x); + vert = create_1d_filter (&height, reconstruct_y, sample_y, sy, subsample_y); + + if (!horz || !vert) + goto out; + + *n_values = 4 + width * subsample_x + height * subsample_y; + + params = malloc (*n_values * sizeof (pixman_fixed_t)); + if (!params) + goto out; + + params[0] = pixman_int_to_fixed (width); + params[1] = pixman_int_to_fixed (height); + params[2] = pixman_int_to_fixed (subsample_bits_x); + params[3] = pixman_int_to_fixed (subsample_bits_y); + + memcpy (params + 4, horz, + width * subsample_x * sizeof (pixman_fixed_t)); + memcpy (params + 4 + width * subsample_x, vert, + height * subsample_y * sizeof (pixman_fixed_t)); + +out: + free (horz); + free (vert); + + return params; +} diff --git a/programs/develop/libraries/pixman/pixman-general.c b/programs/develop/libraries/pixman/pixman-general.c index 8130f166ef..93a1b9acfa 100644 --- a/programs/develop/libraries/pixman/pixman-general.c +++ b/programs/develop/libraries/pixman/pixman-general.c @@ -36,44 +36,102 @@ #include #include #include "pixman-private.h" -#include "pixman-combine32.h" -#include "pixman-private.h" + +static pixman_bool_t +general_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +{ + pixman_image_t *image = iter->image; + + if (image->type == LINEAR) + _pixman_linear_gradient_iter_init (image, iter); + else if (image->type == RADIAL) + _pixman_radial_gradient_iter_init (image, iter); + else if (image->type == CONICAL) + _pixman_conical_gradient_iter_init (image, iter); + else if (image->type == BITS) + _pixman_bits_image_src_iter_init (image, iter); + else if (image->type == SOLID) + _pixman_log_error (FUNC, "Solid image not handled by noop"); + else + _pixman_log_error (FUNC, "Pixman bug: unknown image type\n"); + + return TRUE; +} + +static pixman_bool_t +general_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +{ + if (iter->image->type == BITS) + { + _pixman_bits_image_dest_iter_init (iter->image, iter); + + return TRUE; + } + else + { + _pixman_log_error (FUNC, "Trying to write to a non-writable image"); + + return FALSE; + } +} + +typedef struct op_info_t op_info_t; +struct op_info_t +{ + uint8_t src, dst; +}; + +#define ITER_IGNORE_BOTH \ + (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB | ITER_LOCALIZED_ALPHA) + +static const op_info_t op_flags[PIXMAN_N_OPERATORS] = +{ + /* Src Dst */ + { ITER_IGNORE_BOTH, ITER_IGNORE_BOTH }, /* CLEAR */ + { ITER_LOCALIZED_ALPHA, ITER_IGNORE_BOTH }, /* SRC */ + { ITER_IGNORE_BOTH, ITER_LOCALIZED_ALPHA }, /* DST */ + { 0, ITER_LOCALIZED_ALPHA }, /* OVER */ + { ITER_LOCALIZED_ALPHA, 0 }, /* OVER_REVERSE */ + { ITER_LOCALIZED_ALPHA, ITER_IGNORE_RGB }, /* IN */ + { ITER_IGNORE_RGB, ITER_LOCALIZED_ALPHA }, /* IN_REVERSE */ + { ITER_LOCALIZED_ALPHA, ITER_IGNORE_RGB }, /* OUT */ + { ITER_IGNORE_RGB, ITER_LOCALIZED_ALPHA }, /* OUT_REVERSE */ + { 0, 0 }, /* ATOP */ + { 0, 0 }, /* ATOP_REVERSE */ + { 0, 0 }, /* XOR */ + { ITER_LOCALIZED_ALPHA, ITER_LOCALIZED_ALPHA }, /* ADD */ + { 0, 0 }, /* SATURATE */ +}; #define SCANLINE_BUFFER_LENGTH 8192 static void general_composite_rect (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src, - pixman_image_t * mask, - pixman_image_t * dest, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint64_t stack_scanline_buffer[(SCANLINE_BUFFER_LENGTH * 3 + 7) / 8]; uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer; uint8_t *src_buffer, *mask_buffer, *dest_buffer; - fetch_scanline_t fetch_src = NULL, fetch_mask = NULL, fetch_dest = NULL; + pixman_iter_t src_iter, mask_iter, dest_iter; pixman_combine_32_func_t compose; - store_scanline_t store; - source_image_class_t src_class, mask_class; pixman_bool_t component_alpha; - uint32_t *bits; - int32_t stride; - int narrow, Bpp; + iter_flags_t narrow, src_iter_flags; + int Bpp; int i; - narrow = - (src->common.flags & FAST_PATH_NARROW_FORMAT) && - (!mask || mask->common.flags & FAST_PATH_NARROW_FORMAT) && - (dest->common.flags & FAST_PATH_NARROW_FORMAT); - Bpp = narrow ? 4 : 8; + if ((src_image->common.flags & FAST_PATH_NARROW_FORMAT) && + (!mask_image || mask_image->common.flags & FAST_PATH_NARROW_FORMAT) && + (dest_image->common.flags & FAST_PATH_NARROW_FORMAT)) + { + narrow = ITER_NARROW; + Bpp = 4; + } + else + { + narrow = 0; + Bpp = 16; + } if (width * Bpp > SCANLINE_BUFFER_LENGTH) { @@ -87,172 +145,60 @@ general_composite_rect (pixman_implementation_t *imp, mask_buffer = src_buffer + width * Bpp; dest_buffer = mask_buffer + width * Bpp; - src_class = _pixman_image_classify (src, - src_x, src_y, - width, height); - - mask_class = SOURCE_IMAGE_CLASS_UNKNOWN; - - if (mask) + if (!narrow) { - mask_class = _pixman_image_classify (mask, - src_x, src_y, - width, height); + /* To make sure there aren't any NANs in the buffers */ + memset (src_buffer, 0, width * Bpp); + memset (mask_buffer, 0, width * Bpp); + memset (dest_buffer, 0, width * Bpp); } + + /* src iter */ + src_iter_flags = narrow | op_flags[op].src; - if (op == PIXMAN_OP_CLEAR) - fetch_src = NULL; - else if (narrow) - fetch_src = _pixman_image_get_scanline_32; - else - fetch_src = _pixman_image_get_scanline_64; + _pixman_implementation_src_iter_init (imp->toplevel, &src_iter, src_image, + src_x, src_y, width, height, + src_buffer, src_iter_flags, info->src_flags); - if (!mask || op == PIXMAN_OP_CLEAR) - fetch_mask = NULL; - else if (narrow) - fetch_mask = _pixman_image_get_scanline_32; - else - fetch_mask = _pixman_image_get_scanline_64; - - if (op == PIXMAN_OP_CLEAR || op == PIXMAN_OP_SRC) - fetch_dest = NULL; - else if (narrow) - fetch_dest = _pixman_image_get_scanline_32; - else - fetch_dest = _pixman_image_get_scanline_64; - - if (narrow) - store = _pixman_image_store_scanline_32; - else - store = _pixman_image_store_scanline_64; - - /* Skip the store step and composite directly into the - * destination if the output format of the compose func matches - * the destination format. - * - * If the destination format is a8r8g8b8 then we can always do - * this. If it is x8r8g8b8, then we can only do it if the - * operator doesn't make use of destination alpha. - */ - if ((dest->bits.format == PIXMAN_a8r8g8b8) || - (dest->bits.format == PIXMAN_x8r8g8b8 && - (op == PIXMAN_OP_OVER || - op == PIXMAN_OP_ADD || - op == PIXMAN_OP_SRC || - op == PIXMAN_OP_CLEAR || - op == PIXMAN_OP_IN_REVERSE || - op == PIXMAN_OP_OUT_REVERSE || - op == PIXMAN_OP_DST))) + /* mask iter */ + if ((src_iter_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) == + (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) { - if (narrow && - !dest->common.alpha_map && - !dest->bits.write_func) - { - store = NULL; - } - } - - if (!store) - { - bits = dest->bits.bits; - stride = dest->bits.rowstride; - } - else - { - bits = NULL; - stride = 0; + /* If it doesn't matter what the source is, then it doesn't matter + * what the mask is + */ + mask_image = NULL; } component_alpha = - fetch_src && - fetch_mask && - mask && - mask->common.type == BITS && - mask->common.component_alpha && - PIXMAN_FORMAT_RGB (mask->bits.format); + mask_image && + mask_image->common.type == BITS && + mask_image->common.component_alpha && + PIXMAN_FORMAT_RGB (mask_image->bits.format); - if (narrow) - { - if (component_alpha) - compose = _pixman_implementation_combine_32_ca; - else - compose = _pixman_implementation_combine_32; - } - else - { - if (component_alpha) - compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64_ca; - else - compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64; - } + _pixman_implementation_src_iter_init ( + imp->toplevel, &mask_iter, mask_image, mask_x, mask_y, width, height, + mask_buffer, narrow | (component_alpha? 0 : ITER_IGNORE_RGB), info->mask_flags); - if (!compose) - return; + /* dest iter */ + _pixman_implementation_dest_iter_init ( + imp->toplevel, &dest_iter, dest_image, dest_x, dest_y, width, height, + dest_buffer, narrow | op_flags[op].dst, info->dest_flags); - if (!fetch_mask) - mask_buffer = NULL; + compose = _pixman_implementation_lookup_combiner ( + imp->toplevel, op, component_alpha, narrow); for (i = 0; i < height; ++i) { - /* fill first half of scanline with source */ - if (fetch_src) - { - if (fetch_mask) - { - /* fetch mask before source so that fetching of - source can be optimized */ - fetch_mask (mask, mask_x, mask_y + i, - width, (void *)mask_buffer, 0); + uint32_t *s, *m, *d; - if (mask_class == SOURCE_IMAGE_CLASS_HORIZONTAL) - fetch_mask = NULL; - } + m = mask_iter.get_scanline (&mask_iter, NULL); + s = src_iter.get_scanline (&src_iter, m); + d = dest_iter.get_scanline (&dest_iter, NULL); - if (src_class == SOURCE_IMAGE_CLASS_HORIZONTAL) - { - fetch_src (src, src_x, src_y + i, - width, (void *)src_buffer, 0); - fetch_src = NULL; - } - else - { - fetch_src (src, src_x, src_y + i, - width, (void *)src_buffer, (void *)mask_buffer); - } - } - else if (fetch_mask) - { - fetch_mask (mask, mask_x, mask_y + i, - width, (void *)mask_buffer, 0); - } + compose (imp->toplevel, op, d, s, m, width); - if (store) - { - /* fill dest into second half of scanline */ - if (fetch_dest) - { - fetch_dest (dest, dest_x, dest_y + i, - width, (void *)dest_buffer, 0); - } - - /* blend */ - compose (imp->toplevel, op, - (void *)dest_buffer, - (void *)src_buffer, - (void *)mask_buffer, - width); - - /* write back */ - store (&(dest->bits), dest_x, dest_y + i, width, - (void *)dest_buffer); - } - else - { - /* blend */ - compose (imp->toplevel, op, - bits + (dest_y + i) * stride + dest_x, - (void *)src_buffer, (void *)mask_buffer, width); - } + dest_iter.write_back (&dest_iter); } if (scanline_buffer != (uint8_t *) stack_scanline_buffer) @@ -265,50 +211,16 @@ static const pixman_fast_path_t general_fast_path[] = { PIXMAN_OP_NONE } }; -static pixman_bool_t -general_blt (pixman_implementation_t *imp, - uint32_t * src_bits, - uint32_t * dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, - int src_y, - int dst_x, - int dst_y, - int width, - int height) -{ - /* We can't blit unless we have sse2 or mmx */ - - return FALSE; -} - -static pixman_bool_t -general_fill (pixman_implementation_t *imp, - uint32_t * bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t xor) -{ - return FALSE; -} - pixman_implementation_t * _pixman_implementation_create_general (void) { pixman_implementation_t *imp = _pixman_implementation_create (NULL, general_fast_path); _pixman_setup_combiner_functions_32 (imp); - _pixman_setup_combiner_functions_64 (imp); + _pixman_setup_combiner_functions_float (imp); - imp->blt = general_blt; - imp->fill = general_fill; + imp->src_iter_init = general_src_iter_init; + imp->dest_iter_init = general_dest_iter_init; return imp; } diff --git a/programs/develop/libraries/pixman/pixman-glyph.c b/programs/develop/libraries/pixman/pixman-glyph.c new file mode 100644 index 0000000000..5a271b64b8 --- /dev/null +++ b/programs/develop/libraries/pixman/pixman-glyph.c @@ -0,0 +1,670 @@ +/* + * Copyright 2010, 2012, Soren Sandmann + * Copyright 2010, 2011, 2012, Red Hat, Inc + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Soren Sandmann + */ + +#ifdef HAVE_CONFIG_H +#include +#endif +#include "pixman-private.h" + +#include + +typedef struct glyph_metrics_t glyph_metrics_t; +typedef struct glyph_t glyph_t; + +#define TOMBSTONE ((glyph_t *)0x1) + +/* XXX: These numbers are arbitrary---we've never done any measurements. + */ +#define N_GLYPHS_HIGH_WATER (16384) +#define N_GLYPHS_LOW_WATER (8192) +#define HASH_SIZE (2 * N_GLYPHS_HIGH_WATER) +#define HASH_MASK (HASH_SIZE - 1) + +struct glyph_t +{ + void * font_key; + void * glyph_key; + int origin_x; + int origin_y; + pixman_image_t * image; + pixman_link_t mru_link; +}; + +struct pixman_glyph_cache_t +{ + int n_glyphs; + int n_tombstones; + int freeze_count; + pixman_list_t mru; + glyph_t * glyphs[HASH_SIZE]; +}; + +static void +free_glyph (glyph_t *glyph) +{ + pixman_list_unlink (&glyph->mru_link); + pixman_image_unref (glyph->image); + free (glyph); +} + +static unsigned int +hash (const void *font_key, const void *glyph_key) +{ + size_t key = (size_t)font_key + (size_t)glyph_key; + + /* This hash function is based on one found on Thomas Wang's + * web page at + * + * http://www.concentric.net/~Ttwang/tech/inthash.htm + * + */ + key = (key << 15) - key - 1; + key = key ^ (key >> 12); + key = key + (key << 2); + key = key ^ (key >> 4); + key = key + (key << 3) + (key << 11); + key = key ^ (key >> 16); + + return key; +} + +static glyph_t * +lookup_glyph (pixman_glyph_cache_t *cache, + void *font_key, + void *glyph_key) +{ + unsigned idx; + glyph_t *g; + + idx = hash (font_key, glyph_key); + while ((g = cache->glyphs[idx++ & HASH_MASK])) + { + if (g != TOMBSTONE && + g->font_key == font_key && + g->glyph_key == glyph_key) + { + return g; + } + } + + return NULL; +} + +static void +insert_glyph (pixman_glyph_cache_t *cache, + glyph_t *glyph) +{ + unsigned idx; + glyph_t **loc; + + idx = hash (glyph->font_key, glyph->glyph_key); + + /* Note: we assume that there is room in the table. If there isn't, + * this will be an infinite loop. + */ + do + { + loc = &cache->glyphs[idx++ & HASH_MASK]; + } while (*loc && *loc != TOMBSTONE); + + if (*loc == TOMBSTONE) + cache->n_tombstones--; + cache->n_glyphs++; + + *loc = glyph; +} + +static void +remove_glyph (pixman_glyph_cache_t *cache, + glyph_t *glyph) +{ + unsigned idx; + + idx = hash (glyph->font_key, glyph->glyph_key); + while (cache->glyphs[idx & HASH_MASK] != glyph) + idx++; + + cache->glyphs[idx & HASH_MASK] = TOMBSTONE; + cache->n_tombstones++; + cache->n_glyphs--; + + /* Eliminate tombstones if possible */ + if (cache->glyphs[(idx + 1) & HASH_MASK] == NULL) + { + while (cache->glyphs[idx & HASH_MASK] == TOMBSTONE) + { + cache->glyphs[idx & HASH_MASK] = NULL; + cache->n_tombstones--; + idx--; + } + } +} + +static void +clear_table (pixman_glyph_cache_t *cache) +{ + int i; + + for (i = 0; i < HASH_SIZE; ++i) + { + glyph_t *glyph = cache->glyphs[i]; + + if (glyph && glyph != TOMBSTONE) + free_glyph (glyph); + + cache->glyphs[i] = NULL; + } + + cache->n_glyphs = 0; + cache->n_tombstones = 0; +} + +PIXMAN_EXPORT pixman_glyph_cache_t * +pixman_glyph_cache_create (void) +{ + pixman_glyph_cache_t *cache; + + if (!(cache = malloc (sizeof *cache))) + return NULL; + + memset (cache->glyphs, 0, sizeof (cache->glyphs)); + cache->n_glyphs = 0; + cache->n_tombstones = 0; + cache->freeze_count = 0; + + pixman_list_init (&cache->mru); + + return cache; +} + +PIXMAN_EXPORT void +pixman_glyph_cache_destroy (pixman_glyph_cache_t *cache) +{ + return_if_fail (cache->freeze_count == 0); + + clear_table (cache); + + free (cache); +} + +PIXMAN_EXPORT void +pixman_glyph_cache_freeze (pixman_glyph_cache_t *cache) +{ + cache->freeze_count++; +} + +PIXMAN_EXPORT void +pixman_glyph_cache_thaw (pixman_glyph_cache_t *cache) +{ + if (--cache->freeze_count == 0 && + cache->n_glyphs + cache->n_tombstones > N_GLYPHS_HIGH_WATER) + { + if (cache->n_tombstones > N_GLYPHS_HIGH_WATER) + { + /* More than half the entries are + * tombstones. Just dump the whole table. + */ + clear_table (cache); + } + + while (cache->n_glyphs > N_GLYPHS_LOW_WATER) + { + glyph_t *glyph = CONTAINER_OF (glyph_t, mru_link, cache->mru.tail); + + remove_glyph (cache, glyph); + free_glyph (glyph); + } + } +} + +PIXMAN_EXPORT const void * +pixman_glyph_cache_lookup (pixman_glyph_cache_t *cache, + void *font_key, + void *glyph_key) +{ + return lookup_glyph (cache, font_key, glyph_key); +} + +PIXMAN_EXPORT const void * +pixman_glyph_cache_insert (pixman_glyph_cache_t *cache, + void *font_key, + void *glyph_key, + int origin_x, + int origin_y, + pixman_image_t *image) +{ + glyph_t *glyph; + int32_t width, height; + + return_val_if_fail (cache->freeze_count > 0, NULL); + return_val_if_fail (image->type == BITS, NULL); + + width = image->bits.width; + height = image->bits.height; + + if (cache->n_glyphs >= HASH_SIZE) + return NULL; + + if (!(glyph = malloc (sizeof *glyph))) + return NULL; + + glyph->font_key = font_key; + glyph->glyph_key = glyph_key; + glyph->origin_x = origin_x; + glyph->origin_y = origin_y; + + if (!(glyph->image = pixman_image_create_bits ( + image->bits.format, width, height, NULL, -1))) + { + free (glyph); + return NULL; + } + + pixman_image_composite32 (PIXMAN_OP_SRC, + image, NULL, glyph->image, 0, 0, 0, 0, 0, 0, + width, height); + + if (PIXMAN_FORMAT_A (glyph->image->bits.format) != 0 && + PIXMAN_FORMAT_RGB (glyph->image->bits.format) != 0) + { + pixman_image_set_component_alpha (glyph->image, TRUE); + } + + pixman_list_prepend (&cache->mru, &glyph->mru_link); + + _pixman_image_validate (glyph->image); + insert_glyph (cache, glyph); + + return glyph; +} + +PIXMAN_EXPORT void +pixman_glyph_cache_remove (pixman_glyph_cache_t *cache, + void *font_key, + void *glyph_key) +{ + glyph_t *glyph; + + if ((glyph = lookup_glyph (cache, font_key, glyph_key))) + { + remove_glyph (cache, glyph); + + free_glyph (glyph); + } +} + +PIXMAN_EXPORT void +pixman_glyph_get_extents (pixman_glyph_cache_t *cache, + int n_glyphs, + pixman_glyph_t *glyphs, + pixman_box32_t *extents) +{ + int i; + + extents->x1 = extents->y1 = INT32_MAX; + extents->x2 = extents->y2 = INT32_MIN; + + for (i = 0; i < n_glyphs; ++i) + { + glyph_t *glyph = (glyph_t *)glyphs[i].glyph; + int x1, y1, x2, y2; + + x1 = glyphs[i].x - glyph->origin_x; + y1 = glyphs[i].y - glyph->origin_y; + x2 = glyphs[i].x - glyph->origin_x + glyph->image->bits.width; + y2 = glyphs[i].y - glyph->origin_y + glyph->image->bits.height; + + if (x1 < extents->x1) + extents->x1 = x1; + if (y1 < extents->y1) + extents->y1 = y1; + if (x2 > extents->x2) + extents->x2 = x2; + if (y2 > extents->y2) + extents->y2 = y2; + } +} + +/* This function returns a format that is suitable for use as a mask for the + * set of glyphs in question. + */ +PIXMAN_EXPORT pixman_format_code_t +pixman_glyph_get_mask_format (pixman_glyph_cache_t *cache, + int n_glyphs, + const pixman_glyph_t *glyphs) +{ + pixman_format_code_t format = PIXMAN_a1; + int i; + + for (i = 0; i < n_glyphs; ++i) + { + const glyph_t *glyph = glyphs[i].glyph; + pixman_format_code_t glyph_format = glyph->image->bits.format; + + if (PIXMAN_FORMAT_TYPE (glyph_format) == PIXMAN_TYPE_A) + { + if (PIXMAN_FORMAT_A (glyph_format) > PIXMAN_FORMAT_A (format)) + format = glyph_format; + } + else + { + return PIXMAN_a8r8g8b8; + } + } + + return format; +} + +static pixman_bool_t +box32_intersect (pixman_box32_t *dest, + const pixman_box32_t *box1, + const pixman_box32_t *box2) +{ + dest->x1 = MAX (box1->x1, box2->x1); + dest->y1 = MAX (box1->y1, box2->y1); + dest->x2 = MIN (box1->x2, box2->x2); + dest->y2 = MIN (box1->y2, box2->y2); + + return dest->x2 > dest->x1 && dest->y2 > dest->y1; +} + +PIXMAN_EXPORT void +pixman_composite_glyphs_no_mask (pixman_op_t op, + pixman_image_t *src, + pixman_image_t *dest, + int32_t src_x, + int32_t src_y, + int32_t dest_x, + int32_t dest_y, + pixman_glyph_cache_t *cache, + int n_glyphs, + const pixman_glyph_t *glyphs) +{ + pixman_region32_t region; + pixman_format_code_t glyph_format = PIXMAN_null; + uint32_t glyph_flags = 0; + pixman_format_code_t dest_format; + uint32_t dest_flags; + pixman_composite_func_t func = NULL; + pixman_implementation_t *implementation = NULL; + pixman_composite_info_t info; + int i; + + _pixman_image_validate (src); + _pixman_image_validate (dest); + + dest_format = dest->common.extended_format_code; + dest_flags = dest->common.flags; + + pixman_region32_init (®ion); + if (!_pixman_compute_composite_region32 ( + ®ion, + src, NULL, dest, + src_x - dest_x, src_y - dest_y, 0, 0, 0, 0, + dest->bits.width, dest->bits.height)) + { + goto out; + } + + info.op = op; + info.src_image = src; + info.dest_image = dest; + info.src_flags = src->common.flags; + info.dest_flags = dest->common.flags; + + for (i = 0; i < n_glyphs; ++i) + { + glyph_t *glyph = (glyph_t *)glyphs[i].glyph; + pixman_image_t *glyph_img = glyph->image; + pixman_box32_t glyph_box; + pixman_box32_t *pbox; + uint32_t extra = FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; + pixman_box32_t composite_box; + int n; + + glyph_box.x1 = dest_x + glyphs[i].x - glyph->origin_x; + glyph_box.y1 = dest_y + glyphs[i].y - glyph->origin_y; + glyph_box.x2 = glyph_box.x1 + glyph->image->bits.width; + glyph_box.y2 = glyph_box.y1 + glyph->image->bits.height; + + pbox = pixman_region32_rectangles (®ion, &n); + + info.mask_image = glyph_img; + + while (n--) + { + if (box32_intersect (&composite_box, pbox, &glyph_box)) + { + if (glyph_img->common.extended_format_code != glyph_format || + glyph_img->common.flags != glyph_flags) + { + glyph_format = glyph_img->common.extended_format_code; + glyph_flags = glyph_img->common.flags; + + _pixman_implementation_lookup_composite ( + get_implementation(), op, + src->common.extended_format_code, src->common.flags, + glyph_format, glyph_flags | extra, + dest_format, dest_flags, + &implementation, &func); + } + + info.src_x = src_x + composite_box.x1 - dest_x; + info.src_y = src_y + composite_box.y1 - dest_y; + info.mask_x = composite_box.x1 - (dest_x + glyphs[i].x - glyph->origin_x); + info.mask_y = composite_box.y1 - (dest_y + glyphs[i].y - glyph->origin_y); + info.dest_x = composite_box.x1; + info.dest_y = composite_box.y1; + info.width = composite_box.x2 - composite_box.x1; + info.height = composite_box.y2 - composite_box.y1; + + info.mask_flags = glyph_flags; + + func (implementation, &info); + } + + pbox++; + } + pixman_list_move_to_front (&cache->mru, &glyph->mru_link); + } + +out: + pixman_region32_fini (®ion); +} + +static void +add_glyphs (pixman_glyph_cache_t *cache, + pixman_image_t *dest, + int off_x, int off_y, + int n_glyphs, const pixman_glyph_t *glyphs) +{ + pixman_format_code_t glyph_format = PIXMAN_null; + uint32_t glyph_flags = 0; + pixman_composite_func_t func = NULL; + pixman_implementation_t *implementation = NULL; + pixman_format_code_t dest_format; + uint32_t dest_flags; + pixman_box32_t dest_box; + pixman_composite_info_t info; + pixman_image_t *white_img = NULL; + pixman_bool_t white_src = FALSE; + int i; + + _pixman_image_validate (dest); + + dest_format = dest->common.extended_format_code; + dest_flags = dest->common.flags; + + info.op = PIXMAN_OP_ADD; + info.dest_image = dest; + info.src_x = 0; + info.src_y = 0; + info.dest_flags = dest_flags; + + dest_box.x1 = 0; + dest_box.y1 = 0; + dest_box.x2 = dest->bits.width; + dest_box.y2 = dest->bits.height; + + for (i = 0; i < n_glyphs; ++i) + { + glyph_t *glyph = (glyph_t *)glyphs[i].glyph; + pixman_image_t *glyph_img = glyph->image; + pixman_box32_t glyph_box; + pixman_box32_t composite_box; + + if (glyph_img->common.extended_format_code != glyph_format || + glyph_img->common.flags != glyph_flags) + { + pixman_format_code_t src_format, mask_format; + + glyph_format = glyph_img->common.extended_format_code; + glyph_flags = glyph_img->common.flags; + + if (glyph_format == dest->bits.format) + { + src_format = glyph_format; + mask_format = PIXMAN_null; + info.src_flags = glyph_flags | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; + info.mask_flags = FAST_PATH_IS_OPAQUE; + info.mask_image = NULL; + white_src = FALSE; + } + else + { + if (!white_img) + { + static const pixman_color_t white = { 0xffff, 0xffff, 0xffff, 0xffff }; + + if (!(white_img = pixman_image_create_solid_fill (&white))) + goto out; + + _pixman_image_validate (white_img); + } + + src_format = PIXMAN_solid; + mask_format = glyph_format; + info.src_flags = white_img->common.flags; + info.mask_flags = glyph_flags | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; + info.src_image = white_img; + white_src = TRUE; + } + + _pixman_implementation_lookup_composite ( + get_implementation(), PIXMAN_OP_ADD, + src_format, info.src_flags, + mask_format, info.mask_flags, + dest_format, dest_flags, + &implementation, &func); + } + + glyph_box.x1 = glyphs[i].x - glyph->origin_x + off_x; + glyph_box.y1 = glyphs[i].y - glyph->origin_y + off_y; + glyph_box.x2 = glyph_box.x1 + glyph->image->bits.width; + glyph_box.y2 = glyph_box.y1 + glyph->image->bits.height; + + if (box32_intersect (&composite_box, &glyph_box, &dest_box)) + { + int src_x = composite_box.x1 - glyph_box.x1; + int src_y = composite_box.y1 - glyph_box.y1; + + if (white_src) + info.mask_image = glyph_img; + else + info.src_image = glyph_img; + + info.mask_x = info.src_x = src_x; + info.mask_y = info.src_y = src_y; + info.dest_x = composite_box.x1; + info.dest_y = composite_box.y1; + info.width = composite_box.x2 - composite_box.x1; + info.height = composite_box.y2 - composite_box.y1; + + func (implementation, &info); + + pixman_list_move_to_front (&cache->mru, &glyph->mru_link); + } + } + +out: + if (white_img) + pixman_image_unref (white_img); +} + +/* Conceptually, for each glyph, (white IN glyph) is PIXMAN_OP_ADDed to an + * infinitely big mask image at the position such that the glyph origin point + * is positioned at the (glyphs[i].x, glyphs[i].y) point. + * + * Then (mask_x, mask_y) in the infinite mask and (src_x, src_y) in the source + * image are both aligned with (dest_x, dest_y) in the destination image. Then + * these three images are composited within the + * + * (dest_x, dest_y, dst_x + width, dst_y + height) + * + * rectangle. + * + * TODO: + * - Trim the mask to the destination clip/image? + * - Trim composite region based on sources, when the op ignores 0s. + */ +PIXMAN_EXPORT void +pixman_composite_glyphs (pixman_op_t op, + pixman_image_t *src, + pixman_image_t *dest, + pixman_format_code_t mask_format, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height, + pixman_glyph_cache_t *cache, + int n_glyphs, + const pixman_glyph_t *glyphs) +{ + pixman_image_t *mask; + + if (!(mask = pixman_image_create_bits (mask_format, width, height, NULL, -1))) + return; + + if (PIXMAN_FORMAT_A (mask_format) != 0 && + PIXMAN_FORMAT_RGB (mask_format) != 0) + { + pixman_image_set_component_alpha (mask, TRUE); + } + + add_glyphs (cache, mask, - mask_x, - mask_y, n_glyphs, glyphs); + + pixman_image_composite32 (op, src, mask, dest, + src_x, src_y, + 0, 0, + dest_x, dest_y, + width, height); + + pixman_image_unref (mask); +} diff --git a/programs/develop/libraries/pixman/pixman-gradient-walker.c b/programs/develop/libraries/pixman/pixman-gradient-walker.c index dd666b4120..5944a559ad 100644 --- a/programs/develop/libraries/pixman/pixman-gradient-walker.c +++ b/programs/develop/libraries/pixman/pixman-gradient-walker.c @@ -31,123 +31,71 @@ void _pixman_gradient_walker_init (pixman_gradient_walker_t *walker, gradient_t * gradient, - unsigned int spread) + pixman_repeat_t repeat) { walker->num_stops = gradient->n_stops; walker->stops = gradient->stops; walker->left_x = 0; walker->right_x = 0x10000; - walker->stepper = 0; - walker->left_ag = 0; - walker->left_rb = 0; - walker->right_ag = 0; - walker->right_rb = 0; - walker->spread = spread; + walker->a_s = 0.0f; + walker->a_b = 0.0f; + walker->r_s = 0.0f; + walker->r_b = 0.0f; + walker->g_s = 0.0f; + walker->g_b = 0.0f; + walker->b_s = 0.0f; + walker->b_b = 0.0f; + walker->repeat = repeat; walker->need_reset = TRUE; } -void -_pixman_gradient_walker_reset (pixman_gradient_walker_t *walker, - pixman_fixed_32_32_t pos) +static void +gradient_walker_reset (pixman_gradient_walker_t *walker, + pixman_fixed_48_16_t pos) { int32_t x, left_x, right_x; - pixman_color_t *left_c, *right_c; + pixman_color_t *left_c, *right_c; int n, count = walker->num_stops; - pixman_gradient_stop_t * stops = walker->stops; + pixman_gradient_stop_t *stops = walker->stops; + float la, lr, lg, lb; + float ra, rr, rg, rb; + float lx, rx; - static const pixman_color_t transparent_black = { 0, 0, 0, 0 }; - - switch (walker->spread) + if (walker->repeat == PIXMAN_REPEAT_NORMAL) { - case PIXMAN_REPEAT_NORMAL: - x = (int32_t)pos & 0xFFFF; - for (n = 0; n < count; n++) - if (x < stops[n].x) - break; - if (n == 0) - { - left_x = stops[count - 1].x - 0x10000; - left_c = &stops[count - 1].color; - } - else - { - left_x = stops[n - 1].x; - left_c = &stops[n - 1].color; - } - - if (n == count) - { - right_x = stops[0].x + 0x10000; - right_c = &stops[0].color; - } - else - { - right_x = stops[n].x; - right_c = &stops[n].color; - } - left_x += (pos - x); - right_x += (pos - x); - break; - - case PIXMAN_REPEAT_PAD: - for (n = 0; n < count; n++) - if (pos < stops[n].x) - break; - - if (n == 0) - { - left_x = INT32_MIN; - left_c = &stops[0].color; - } - else - { - left_x = stops[n - 1].x; - left_c = &stops[n - 1].color; - } - - if (n == count) - { - right_x = INT32_MAX; - right_c = &stops[n - 1].color; - } - else - { - right_x = stops[n].x; - right_c = &stops[n].color; - } - break; - - case PIXMAN_REPEAT_REFLECT: - x = (int32_t)pos & 0xFFFF; + x = (int32_t)pos & 0xffff; + } + else if (walker->repeat == PIXMAN_REPEAT_REFLECT) + { + x = (int32_t)pos & 0xffff; if ((int32_t)pos & 0x10000) x = 0x10000 - x; - for (n = 0; n < count; n++) - if (x < stops[n].x) - break; - - if (n == 0) - { - left_x = -stops[0].x; - left_c = &stops[0].color; - } - else - { - left_x = stops[n - 1].x; - left_c = &stops[n - 1].color; - } - - if (n == count) - { - right_x = 0x20000 - stops[n - 1].x; - right_c = &stops[n - 1].color; - } - else - { - right_x = stops[n].x; - right_c = &stops[n].color; - } + } + else + { + x = pos; + } + + for (n = 0; n < count; n++) + { + if (x < stops[n].x) + break; + } + + left_x = stops[n - 1].x; + left_c = &stops[n - 1].color; + + right_x = stops[n].x; + right_c = &stops[n].color; + if (walker->repeat == PIXMAN_REPEAT_NORMAL) + { + left_x += (pos - x); + right_x += (pos - x); + } + else if (walker->repeat == PIXMAN_REPEAT_REFLECT) + { if ((int32_t)pos & 0x10000) { pixman_color_t *tmp_c; @@ -165,90 +113,90 @@ _pixman_gradient_walker_reset (pixman_gradient_walker_t *walker, } left_x += (pos - x); right_x += (pos - x); - break; - - default: /* REPEAT_NONE */ - for (n = 0; n < count; n++) - if (pos < stops[n].x) - break; - + } + else if (walker->repeat == PIXMAN_REPEAT_NONE) + { if (n == 0) - { - left_x = INT32_MIN; - right_x = stops[0].x; - left_c = right_c = (pixman_color_t*) &transparent_black; - } + right_c = left_c; else if (n == count) - { - left_x = stops[n - 1].x; - right_x = INT32_MAX; - left_c = right_c = (pixman_color_t*) &transparent_black; - } - else - { - left_x = stops[n - 1].x; - right_x = stops[n].x; - left_c = &stops[n - 1].color; - right_c = &stops[n].color; - } + left_c = right_c; } - walker->left_x = left_x; - walker->right_x = right_x; - walker->left_ag = ((left_c->alpha >> 8) << 16) | (left_c->green >> 8); - walker->left_rb = ((left_c->red & 0xff00) << 8) | (left_c->blue >> 8); - walker->right_ag = ((right_c->alpha >> 8) << 16) | (right_c->green >> 8); - walker->right_rb = ((right_c->red & 0xff00) << 8) | (right_c->blue >> 8); + /* The alpha channel is scaled to be in the [0, 255] interval, + * and the red/green/blue channels are scaled to be in [0, 1]. + * This ensures that after premultiplication all channels will + * be in the [0, 255] interval. + */ + la = (left_c->alpha * (1.0f/257.0f)); + lr = (left_c->red * (1.0f/257.0f)); + lg = (left_c->green * (1.0f/257.0f)); + lb = (left_c->blue * (1.0f/257.0f)); - if (walker->left_x == walker->right_x || - ( walker->left_ag == walker->right_ag && - walker->left_rb == walker->right_rb ) ) + ra = (right_c->alpha * (1.0f/257.0f)); + rr = (right_c->red * (1.0f/257.0f)); + rg = (right_c->green * (1.0f/257.0f)); + rb = (right_c->blue * (1.0f/257.0f)); + + lx = left_x * (1.0f/65536.0f); + rx = right_x * (1.0f/65536.0f); + + if (FLOAT_IS_ZERO (rx - lx) || left_x == INT32_MIN || right_x == INT32_MAX) { - walker->stepper = 0; + walker->a_s = walker->r_s = walker->g_s = walker->b_s = 0.0f; + walker->a_b = (la + ra) / 2.0f; + walker->r_b = (lr + rr) / 510.0f; + walker->g_b = (lg + rg) / 510.0f; + walker->b_b = (lb + rb) / 510.0f; } else { - int32_t width = right_x - left_x; - walker->stepper = ((1 << 24) + width / 2) / width; + float w_rec = 1.0f / (rx - lx); + + walker->a_b = (la * rx - ra * lx) * w_rec; + walker->r_b = (lr * rx - rr * lx) * w_rec * (1.0f/255.0f); + walker->g_b = (lg * rx - rg * lx) * w_rec * (1.0f/255.0f); + walker->b_b = (lb * rx - rb * lx) * w_rec * (1.0f/255.0f); + + walker->a_s = (ra - la) * w_rec; + walker->r_s = (rr - lr) * w_rec * (1.0f/255.0f); + walker->g_s = (rg - lg) * w_rec * (1.0f/255.0f); + walker->b_s = (rb - lb) * w_rec * (1.0f/255.0f); } + + walker->left_x = left_x; + walker->right_x = right_x; walker->need_reset = FALSE; } -#define PIXMAN_GRADIENT_WALKER_NEED_RESET(w, x) \ - ( (w)->need_reset || (x) < (w)->left_x || (x) >= (w)->right_x) - - -/* the following assumes that PIXMAN_GRADIENT_WALKER_NEED_RESET(w,x) is FALSE */ uint32_t _pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker, - pixman_fixed_32_32_t x) + pixman_fixed_48_16_t x) { - int dist, idist; - uint32_t t1, t2, a, color; + float a, r, g, b; + uint8_t a8, r8, g8, b8; + uint32_t v; + float y; - if (PIXMAN_GRADIENT_WALKER_NEED_RESET (walker, x)) - _pixman_gradient_walker_reset (walker, x); + if (walker->need_reset || x < walker->left_x || x >= walker->right_x) + gradient_walker_reset (walker, x); - dist = ((int)(x - walker->left_x) * walker->stepper) >> 16; - idist = 256 - dist; + y = x * (1.0f / 65536.0f); - /* combined INTERPOLATE and premultiply */ - t1 = walker->left_rb * idist + walker->right_rb * dist; - t1 = (t1 >> 8) & 0xff00ff; + a = walker->a_s * y + walker->a_b; + r = a * (walker->r_s * y + walker->r_b); + g = a * (walker->g_s * y + walker->g_b); + b = a * (walker->b_s * y + walker->b_b); - t2 = walker->left_ag * idist + walker->right_ag * dist; - t2 &= 0xff00ff00; + a8 = a + 0.5f; + r8 = r + 0.5f; + g8 = g + 0.5f; + b8 = b + 0.5f; - color = t2 & 0xff000000; - a = t2 >> 24; + v = ((a8 << 24) & 0xff000000) | + ((r8 << 16) & 0x00ff0000) | + ((g8 << 8) & 0x0000ff00) | + ((b8 >> 0) & 0x000000ff); - t1 = t1 * a + 0x800080; - t1 = (t1 + ((t1 >> 8) & 0xff00ff)) >> 8; - - t2 = (t2 >> 8) * a + 0x800080; - t2 = (t2 + ((t2 >> 8) & 0xff00ff)); - - return (color | (t1 & 0xff00ff) | (t2 & 0xff00)); + return v; } - diff --git a/programs/develop/libraries/pixman/pixman-image.c b/programs/develop/libraries/pixman/pixman-image.c index c646471930..65041b43b7 100644 --- a/programs/develop/libraries/pixman/pixman-image.c +++ b/programs/develop/libraries/pixman/pixman-image.c @@ -30,7 +30,50 @@ #include #include "pixman-private.h" -#include "pixman-combine32.h" + +static const pixman_color_t transparent_black = { 0, 0, 0, 0 }; + +static void +gradient_property_changed (pixman_image_t *image) +{ + gradient_t *gradient = &image->gradient; + int n = gradient->n_stops; + pixman_gradient_stop_t *stops = gradient->stops; + pixman_gradient_stop_t *begin = &(gradient->stops[-1]); + pixman_gradient_stop_t *end = &(gradient->stops[n]); + + switch (gradient->common.repeat) + { + default: + case PIXMAN_REPEAT_NONE: + begin->x = INT32_MIN; + begin->color = transparent_black; + end->x = INT32_MAX; + end->color = transparent_black; + break; + + case PIXMAN_REPEAT_NORMAL: + begin->x = stops[n - 1].x - pixman_fixed_1; + begin->color = stops[n - 1].color; + end->x = stops[0].x + pixman_fixed_1; + end->color = stops[0].color; + break; + + case PIXMAN_REPEAT_REFLECT: + begin->x = - stops[0].x; + begin->color = stops[0].color; + end->x = pixman_int_to_fixed (2) - stops[n - 1].x; + end->color = stops[n - 1].color; + break; + + case PIXMAN_REPEAT_PAD: + begin->x = INT32_MIN; + begin->color = stops[0].color; + end->x = INT32_MAX; + end->color = stops[n - 1].color; + break; + } +} pixman_bool_t _pixman_init_gradient (gradient_t * gradient, @@ -39,54 +82,100 @@ _pixman_init_gradient (gradient_t * gradient, { return_val_if_fail (n_stops > 0, FALSE); - gradient->stops = pixman_malloc_ab (n_stops, sizeof (pixman_gradient_stop_t)); + /* We allocate two extra stops, one before the beginning of the stop list, + * and one after the end. These stops are initialized to whatever color + * would be used for positions outside the range of the stop list. + * + * This saves a bit of computation in the gradient walker. + * + * The pointer we store in the gradient_t struct still points to the + * first user-supplied struct, so when freeing, we will have to + * subtract one. + */ + gradient->stops = + pixman_malloc_ab (n_stops + 2, sizeof (pixman_gradient_stop_t)); if (!gradient->stops) return FALSE; + gradient->stops += 1; memcpy (gradient->stops, stops, n_stops * sizeof (pixman_gradient_stop_t)); - gradient->n_stops = n_stops; - gradient->stop_range = 0xffff; + gradient->common.property_changed = gradient_property_changed; return TRUE; } -/* - * By default, just evaluate the image at 32bpp and expand. Individual image - * types can plug in a better scanline getter if they want to. For example - * we could produce smoother gradients by evaluating them at higher color - * depth, but that's a project for the future. - */ void -_pixman_image_get_scanline_generic_64 (pixman_image_t * image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t * mask) +_pixman_image_init (pixman_image_t *image) { - uint32_t *mask8 = NULL; + image_common_t *common = &image->common; - /* Contract the mask image, if one exists, so that the 32-bit fetch - * function can use it. - */ - if (mask) + pixman_region32_init (&common->clip_region); + + common->alpha_count = 0; + common->have_clip_region = FALSE; + common->clip_sources = FALSE; + common->transform = NULL; + common->repeat = PIXMAN_REPEAT_NONE; + common->filter = PIXMAN_FILTER_NEAREST; + common->filter_params = NULL; + common->n_filter_params = 0; + common->alpha_map = NULL; + common->component_alpha = FALSE; + common->ref_count = 1; + common->property_changed = NULL; + common->client_clip = FALSE; + common->destroy_func = NULL; + common->destroy_data = NULL; + common->dirty = TRUE; +} + +pixman_bool_t +_pixman_image_fini (pixman_image_t *image) +{ + image_common_t *common = (image_common_t *)image; + + common->ref_count--; + + if (common->ref_count == 0) { - mask8 = pixman_malloc_ab (width, sizeof(uint32_t)); - if (!mask8) - return; + if (image->common.destroy_func) + image->common.destroy_func (image, image->common.destroy_data); - pixman_contract (mask8, (uint64_t *)mask, width); + pixman_region32_fini (&common->clip_region); + + free (common->transform); + free (common->filter_params); + + if (common->alpha_map) + pixman_image_unref ((pixman_image_t *)common->alpha_map); + + if (image->type == LINEAR || + image->type == RADIAL || + image->type == CONICAL) + { + if (image->gradient.stops) + { + /* See _pixman_init_gradient() for an explanation of the - 1 */ + free (image->gradient.stops - 1); + } + + /* This will trigger if someone adds a property_changed + * method to the linear/radial/conical gradient overwriting + * the general one. + */ + assert ( + image->common.property_changed == gradient_property_changed); + } + + if (image->type == BITS && image->bits.free_me) + free (image->bits.free_me); + + return TRUE; } - /* Fetch the source image into the first half of buffer. */ - _pixman_image_get_scanline_32 (image, x, y, width, (uint32_t*)buffer, mask8); - - /* Expand from 32bpp to 64bpp in place. */ - pixman_expand ((uint64_t *)buffer, buffer, PIXMAN_a8r8g8b8, width); - - free (mask8); + return FALSE; } pixman_image_t * @@ -95,70 +184,11 @@ _pixman_image_allocate (void) pixman_image_t *image = malloc (sizeof (pixman_image_t)); if (image) - { - image_common_t *common = &image->common; - - pixman_region32_init (&common->clip_region); - - common->alpha_count = 0; - common->have_clip_region = FALSE; - common->clip_sources = FALSE; - common->transform = NULL; - common->repeat = PIXMAN_REPEAT_NONE; - common->filter = PIXMAN_FILTER_NEAREST; - common->filter_params = NULL; - common->n_filter_params = 0; - common->alpha_map = NULL; - common->component_alpha = FALSE; - common->ref_count = 1; - common->classify = NULL; - common->client_clip = FALSE; - common->destroy_func = NULL; - common->destroy_data = NULL; - common->dirty = TRUE; - } + _pixman_image_init (image); return image; } -source_image_class_t -_pixman_image_classify (pixman_image_t *image, - int x, - int y, - int width, - int height) -{ - if (image->common.classify) - return image->common.classify (image, x, y, width, height); - else - return SOURCE_IMAGE_CLASS_UNKNOWN; -} - -void -_pixman_image_get_scanline_32 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - image->common.get_scanline_32 (image, x, y, width, buffer, mask); -} - -/* Even thought the type of buffer is uint32_t *, the function actually expects - * a uint64_t *buffer. - */ -void -_pixman_image_get_scanline_64 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *unused) -{ - image->common.get_scanline_64 (image, x, y, width, buffer, unused); -} - static void image_property_changed (pixman_image_t *image) { @@ -178,39 +208,9 @@ pixman_image_ref (pixman_image_t *image) PIXMAN_EXPORT pixman_bool_t pixman_image_unref (pixman_image_t *image) { - image_common_t *common = (image_common_t *)image; - - common->ref_count--; - - if (common->ref_count == 0) + if (_pixman_image_fini (image)) { - if (image->common.destroy_func) - image->common.destroy_func (image, image->common.destroy_data); - - pixman_region32_fini (&common->clip_region); - - if (common->transform) - free (common->transform); - - if (common->filter_params) - free (common->filter_params); - - if (common->alpha_map) - pixman_image_unref ((pixman_image_t *)common->alpha_map); - - if (image->type == LINEAR || - image->type == RADIAL || - image->type == CONICAL) - { - if (image->gradient.stops) - free (image->gradient.stops); - } - - if (image->type == BITS && image->bits.free_me) - free (image->bits.free_me); - free (image); - return TRUE; } @@ -238,54 +238,27 @@ _pixman_image_reset_clip_region (pixman_image_t *image) image->common.have_clip_region = FALSE; } -static pixman_bool_t out_of_bounds_workaround = TRUE; - -/* Old X servers rely on out-of-bounds accesses when they are asked - * to composite with a window as the source. They create a pixman image - * pointing to some bogus position in memory, but then they set a clip - * region to the position where the actual bits are. +/* Executive Summary: This function is a no-op that only exists + * for historical reasons. + * + * There used to be a bug in the X server where it would rely on + * out-of-bounds accesses when it was asked to composite with a + * window as the source. It would create a pixman image pointing + * to some bogus position in memory, but then set a clip region + * to the position where the actual bits were. * * Due to a bug in old versions of pixman, where it would not clip * against the image bounds when a clip region was set, this would - * actually work. So by default we allow certain out-of-bound access - * to happen unless explicitly disabled. + * actually work. So when the pixman bug was fixed, a workaround was + * added to allow certain out-of-bound accesses. This function disabled + * those workarounds. * - * Fixed X servers should call this function to disable the workaround. + * Since 0.21.2, pixman doesn't do these workarounds anymore, so now + * this function is a no-op. */ PIXMAN_EXPORT void pixman_disable_out_of_bounds_workaround (void) { - out_of_bounds_workaround = FALSE; -} - -static pixman_bool_t -source_image_needs_out_of_bounds_workaround (bits_image_t *image) -{ - if (image->common.clip_sources && - image->common.repeat == PIXMAN_REPEAT_NONE && - image->common.have_clip_region && - out_of_bounds_workaround) - { - if (!image->common.client_clip) - { - /* There is no client clip, so if the clip region extends beyond the - * drawable geometry, it must be because the X server generated the - * bogus clip region. - */ - const pixman_box32_t *extents = - pixman_region32_extents (&image->common.clip_region); - - if (extents->x1 >= 0 && extents->x2 <= image->width && - extents->y1 >= 0 && extents->y2 <= image->height) - { - return FALSE; - } - } - - return TRUE; - } - - return FALSE; } static void @@ -315,8 +288,24 @@ compute_image_info (pixman_image_t *image) if (image->common.transform->matrix[0][1] == 0 && image->common.transform->matrix[1][0] == 0) { + if (image->common.transform->matrix[0][0] == -pixman_fixed_1 && + image->common.transform->matrix[1][1] == -pixman_fixed_1) + { + flags |= FAST_PATH_ROTATE_180_TRANSFORM; + } flags |= FAST_PATH_SCALE_TRANSFORM; } + else if (image->common.transform->matrix[0][0] == 0 && + image->common.transform->matrix[1][1] == 0) + { + pixman_fixed_t m01 = image->common.transform->matrix[0][1]; + pixman_fixed_t m10 = image->common.transform->matrix[1][0]; + + if (m01 == -pixman_fixed_1 && m10 == pixman_fixed_1) + flags |= FAST_PATH_ROTATE_90_TRANSFORM; + else if (m01 == pixman_fixed_1 && m10 == -pixman_fixed_1) + flags |= FAST_PATH_ROTATE_270_TRANSFORM; + } } if (image->common.transform->matrix[0][0] > 0) @@ -338,11 +327,56 @@ compute_image_info (pixman_image_t *image) case PIXMAN_FILTER_GOOD: case PIXMAN_FILTER_BEST: flags |= (FAST_PATH_BILINEAR_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER); + + /* Here we have a chance to optimize BILINEAR filter to NEAREST if + * they are equivalent for the currently used transformation matrix. + */ + if (flags & FAST_PATH_ID_TRANSFORM) + { + flags |= FAST_PATH_NEAREST_FILTER; + } + else if ( + /* affine and integer translation components in matrix ... */ + ((flags & FAST_PATH_AFFINE_TRANSFORM) && + !pixman_fixed_frac (image->common.transform->matrix[0][2] | + image->common.transform->matrix[1][2])) && + ( + /* ... combined with a simple rotation */ + (flags & (FAST_PATH_ROTATE_90_TRANSFORM | + FAST_PATH_ROTATE_180_TRANSFORM | + FAST_PATH_ROTATE_270_TRANSFORM)) || + /* ... or combined with a simple non-rotated translation */ + (image->common.transform->matrix[0][0] == pixman_fixed_1 && + image->common.transform->matrix[1][1] == pixman_fixed_1 && + image->common.transform->matrix[0][1] == 0 && + image->common.transform->matrix[1][0] == 0) + ) + ) + { + /* FIXME: there are some affine-test failures, showing that + * handling of BILINEAR and NEAREST filter is not quite + * equivalent when getting close to 32K for the translation + * components of the matrix. That's likely some bug, but for + * now just skip BILINEAR->NEAREST optimization in this case. + */ + pixman_fixed_t magic_limit = pixman_int_to_fixed (30000); + if (image->common.transform->matrix[0][2] <= magic_limit && + image->common.transform->matrix[1][2] <= magic_limit && + image->common.transform->matrix[0][2] >= -magic_limit && + image->common.transform->matrix[1][2] >= -magic_limit) + { + flags |= FAST_PATH_NEAREST_FILTER; + } + } break; case PIXMAN_FILTER_CONVOLUTION: break; + case PIXMAN_FILTER_SEPARABLE_CONVOLUTION: + flags |= FAST_PATH_SEPARABLE_CONVOLUTION_FILTER; + break; + default: flags |= FAST_PATH_NO_CONVOLUTION_FILTER; break; @@ -408,6 +442,7 @@ compute_image_info (pixman_image_t *image) else { code = image->bits.format; + flags |= FAST_PATH_BITS_IMAGE; } if (!PIXMAN_FORMAT_A (image->bits.format) && @@ -420,9 +455,6 @@ compute_image_info (pixman_image_t *image) flags |= FAST_PATH_IS_OPAQUE; } - if (source_image_needs_out_of_bounds_workaround (&image->bits)) - flags |= FAST_PATH_NEEDS_WORKAROUND; - if (image->bits.read_func || image->bits.write_func) flags &= ~FAST_PATH_NO_ACCESSORS; @@ -445,6 +477,7 @@ compute_image_info (pixman_image_t *image) /* Fall through */ + case CONICAL: case LINEAR: code = PIXMAN_unknown; @@ -486,8 +519,9 @@ compute_image_info (pixman_image_t *image) * if all channels are opaque, so we simply turn it off * unconditionally for those images. */ - if (image->common.alpha_map || - image->common.filter == PIXMAN_FILTER_CONVOLUTION || + if (image->common.alpha_map || + image->common.filter == PIXMAN_FILTER_CONVOLUTION || + image->common.filter == PIXMAN_FILTER_SEPARABLE_CONVOLUTION || image->common.component_alpha) { flags &= ~(FAST_PATH_IS_OPAQUE | FAST_PATH_SAMPLES_OPAQUE); @@ -509,7 +543,8 @@ _pixman_image_validate (pixman_image_t *image) * property_changed() can make use of the flags * to set up accessors etc. */ - image->common.property_changed (image); + if (image->common.property_changed) + image->common.property_changed (image); image->common.dirty = FALSE; } @@ -590,7 +625,7 @@ pixman_image_set_transform (pixman_image_t * image, if (common->transform == transform) return TRUE; - if (memcmp (&id, transform, sizeof (pixman_transform_t)) == 0) + if (!transform || memcmp (&id, transform, sizeof (pixman_transform_t)) == 0) { free (common->transform); common->transform = NULL; @@ -599,6 +634,12 @@ pixman_image_set_transform (pixman_image_t * image, goto out; } + if (common->transform && + memcmp (common->transform, transform, sizeof (pixman_transform_t)) == 0) + { + return TRUE; + } + if (common->transform == NULL) common->transform = malloc (sizeof (pixman_transform_t)); @@ -623,6 +664,9 @@ PIXMAN_EXPORT void pixman_image_set_repeat (pixman_image_t *image, pixman_repeat_t repeat) { + if (image->common.repeat == repeat) + return; + image->common.repeat = repeat; image_property_changed (image); @@ -640,6 +684,19 @@ pixman_image_set_filter (pixman_image_t * image, if (params == common->filter_params && filter == common->filter) return TRUE; + if (filter == PIXMAN_FILTER_SEPARABLE_CONVOLUTION) + { + int width = pixman_fixed_to_int (params[0]); + int height = pixman_fixed_to_int (params[1]); + int x_phase_bits = pixman_fixed_to_int (params[2]); + int y_phase_bits = pixman_fixed_to_int (params[3]); + int n_x_phases = (1 << x_phase_bits); + int n_y_phases = (1 << y_phase_bits); + + return_val_if_fail ( + n_params == 4 + n_x_phases * width + n_y_phases * height, FALSE); + } + new_params = NULL; if (params) { @@ -667,6 +724,9 @@ PIXMAN_EXPORT void pixman_image_set_source_clipping (pixman_image_t *image, pixman_bool_t clip_sources) { + if (image->common.clip_sources == clip_sources) + return; + image->common.clip_sources = clip_sources; image_property_changed (image); @@ -682,6 +742,9 @@ pixman_image_set_indexed (pixman_image_t * image, { bits_image_t *bits = (bits_image_t *)image; + if (bits->indexed == indexed) + return; + bits->indexed = indexed; image_property_changed (image); @@ -744,6 +807,9 @@ PIXMAN_EXPORT void pixman_image_set_component_alpha (pixman_image_t *image, pixman_bool_t component_alpha) { + if (image->common.component_alpha == component_alpha) + return; + image->common.component_alpha = component_alpha; image_property_changed (image); @@ -822,19 +888,47 @@ pixman_image_get_format (pixman_image_t *image) if (image->type == BITS) return image->bits.format; - return 0; + return PIXMAN_null; } uint32_t -_pixman_image_get_solid (pixman_image_t * image, - pixman_format_code_t format) +_pixman_image_get_solid (pixman_implementation_t *imp, + pixman_image_t * image, + pixman_format_code_t format) { uint32_t result; - _pixman_image_get_scanline_32 (image, 0, 0, 1, &result, NULL); + if (image->type == SOLID) + { + result = image->solid.color_32; + } + else if (image->type == BITS) + { + if (image->bits.format == PIXMAN_a8r8g8b8) + result = image->bits.bits[0]; + else if (image->bits.format == PIXMAN_x8r8g8b8) + result = image->bits.bits[0] | 0xff000000; + else if (image->bits.format == PIXMAN_a8) + result = (*(uint8_t *)image->bits.bits) << 24; + else + goto otherwise; + } + else + { + pixman_iter_t iter; + + otherwise: + _pixman_implementation_src_iter_init ( + imp, &iter, image, 0, 0, 1, 1, + (uint8_t *)&result, + ITER_NARROW, image->common.flags); + + result = *iter.get_scanline (&iter, NULL); + } /* If necessary, convert RGB <--> BGR. */ - if (PIXMAN_FORMAT_TYPE (format) != PIXMAN_TYPE_ARGB) + if (PIXMAN_FORMAT_TYPE (format) != PIXMAN_TYPE_ARGB + && PIXMAN_FORMAT_TYPE (format) != PIXMAN_TYPE_ARGB_SRGB) { result = (((result & 0xff000000) >> 0) | ((result & 0x00ff0000) >> 16) | diff --git a/programs/develop/libraries/pixman/pixman-implementation.c b/programs/develop/libraries/pixman/pixman-implementation.c index bc3749ef59..0c97cd3a27 100644 --- a/programs/develop/libraries/pixman/pixman-implementation.c +++ b/programs/develop/libraries/pixman/pixman-implementation.c @@ -27,168 +27,206 @@ #include #include "pixman-private.h" -static void -delegate_combine_32 (pixman_implementation_t * imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - _pixman_implementation_combine_32 (imp->delegate, - op, dest, src, mask, width); -} - -static void -delegate_combine_64 (pixman_implementation_t * imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - _pixman_implementation_combine_64 (imp->delegate, - op, dest, src, mask, width); -} - -static void -delegate_combine_32_ca (pixman_implementation_t * imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - _pixman_implementation_combine_32_ca (imp->delegate, - op, dest, src, mask, width); -} - -static void -delegate_combine_64_ca (pixman_implementation_t * imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - _pixman_implementation_combine_64_ca (imp->delegate, - op, dest, src, mask, width); -} - -static pixman_bool_t -delegate_blt (pixman_implementation_t * imp, - uint32_t * src_bits, - uint32_t * dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, - int src_y, - int dst_x, - int dst_y, - int width, - int height) -{ - return _pixman_implementation_blt ( - imp->delegate, src_bits, dst_bits, src_stride, dst_stride, - src_bpp, dst_bpp, src_x, src_y, dst_x, dst_y, - width, height); -} - -static pixman_bool_t -delegate_fill (pixman_implementation_t *imp, - uint32_t * bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t xor) -{ - return _pixman_implementation_fill ( - imp->delegate, bits, stride, bpp, x, y, width, height, xor); -} - pixman_implementation_t * -_pixman_implementation_create (pixman_implementation_t *delegate, +_pixman_implementation_create (pixman_implementation_t *fallback, const pixman_fast_path_t *fast_paths) { - pixman_implementation_t *imp = malloc (sizeof (pixman_implementation_t)); - pixman_implementation_t *d; - int i; - - if (!imp) - return NULL; + pixman_implementation_t *imp; assert (fast_paths); - /* Make sure the whole delegate chain has the right toplevel */ - imp->delegate = delegate; - for (d = imp; d != NULL; d = d->delegate) - d->toplevel = imp; - - /* Fill out function pointers with ones that just delegate - */ - imp->blt = delegate_blt; - imp->fill = delegate_fill; - - for (i = 0; i < PIXMAN_N_OPERATORS; ++i) + if ((imp = malloc (sizeof (pixman_implementation_t)))) { - imp->combine_32[i] = delegate_combine_32; - imp->combine_64[i] = delegate_combine_64; - imp->combine_32_ca[i] = delegate_combine_32_ca; - imp->combine_64_ca[i] = delegate_combine_64_ca; + pixman_implementation_t *d; + + memset (imp, 0, sizeof *imp); + + imp->fallback = fallback; + imp->fast_paths = fast_paths; + + /* Make sure the whole fallback chain has the right toplevel */ + for (d = imp; d != NULL; d = d->fallback) + d->toplevel = imp; } - imp->fast_paths = fast_paths; - return imp; } -void -_pixman_implementation_combine_32 (pixman_implementation_t * imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) +#define N_CACHED_FAST_PATHS 8 + +typedef struct +{ + struct + { + pixman_implementation_t * imp; + pixman_fast_path_t fast_path; + } cache [N_CACHED_FAST_PATHS]; +} cache_t; + +PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache); + +static void +dummy_composite_rect (pixman_implementation_t *imp, + pixman_composite_info_t *info) { - (*imp->combine_32[op]) (imp, op, dest, src, mask, width); } void -_pixman_implementation_combine_64 (pixman_implementation_t * imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) +_pixman_implementation_lookup_composite (pixman_implementation_t *toplevel, + pixman_op_t op, + pixman_format_code_t src_format, + uint32_t src_flags, + pixman_format_code_t mask_format, + uint32_t mask_flags, + pixman_format_code_t dest_format, + uint32_t dest_flags, + pixman_implementation_t **out_imp, + pixman_composite_func_t *out_func) { - (*imp->combine_64[op]) (imp, op, dest, src, mask, width); + pixman_implementation_t *imp; + cache_t *cache; + int i; + + /* Check cache for fast paths */ + cache = PIXMAN_GET_THREAD_LOCAL (fast_path_cache); + + for (i = 0; i < N_CACHED_FAST_PATHS; ++i) + { + const pixman_fast_path_t *info = &(cache->cache[i].fast_path); + + /* Note that we check for equality here, not whether + * the cached fast path matches. This is to prevent + * us from selecting an overly general fast path + * when a more specific one would work. + */ + if (info->op == op && + info->src_format == src_format && + info->mask_format == mask_format && + info->dest_format == dest_format && + info->src_flags == src_flags && + info->mask_flags == mask_flags && + info->dest_flags == dest_flags && + info->func) + { + *out_imp = cache->cache[i].imp; + *out_func = cache->cache[i].fast_path.func; + + goto update_cache; + } + } + + for (imp = toplevel; imp != NULL; imp = imp->fallback) + { + const pixman_fast_path_t *info = imp->fast_paths; + + while (info->op != PIXMAN_OP_NONE) + { + if ((info->op == op || info->op == PIXMAN_OP_any) && + /* Formats */ + ((info->src_format == src_format) || + (info->src_format == PIXMAN_any)) && + ((info->mask_format == mask_format) || + (info->mask_format == PIXMAN_any)) && + ((info->dest_format == dest_format) || + (info->dest_format == PIXMAN_any)) && + /* Flags */ + (info->src_flags & src_flags) == info->src_flags && + (info->mask_flags & mask_flags) == info->mask_flags && + (info->dest_flags & dest_flags) == info->dest_flags) + { + *out_imp = imp; + *out_func = info->func; + + /* Set i to the last spot in the cache so that the + * move-to-front code below will work + */ + i = N_CACHED_FAST_PATHS - 1; + + goto update_cache; + } + + ++info; + } + } + + /* We should never reach this point */ + _pixman_log_error ( + FUNC, + "No composite function found\n" + "\n" + "The most likely cause of this is that this system has issues with\n" + "thread local storage\n"); + + *out_imp = NULL; + *out_func = dummy_composite_rect; + return; + +update_cache: + if (i) + { + while (i--) + cache->cache[i + 1] = cache->cache[i]; + + cache->cache[0].imp = *out_imp; + cache->cache[0].fast_path.op = op; + cache->cache[0].fast_path.src_format = src_format; + cache->cache[0].fast_path.src_flags = src_flags; + cache->cache[0].fast_path.mask_format = mask_format; + cache->cache[0].fast_path.mask_flags = mask_flags; + cache->cache[0].fast_path.dest_format = dest_format; + cache->cache[0].fast_path.dest_flags = dest_flags; + cache->cache[0].fast_path.func = *out_func; + } } -void -_pixman_implementation_combine_32_ca (pixman_implementation_t * imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) +static void +dummy_combine (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) { - (*imp->combine_32_ca[op]) (imp, op, dest, src, mask, width); } -void -_pixman_implementation_combine_64_ca (pixman_implementation_t * imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) +pixman_combine_32_func_t +_pixman_implementation_lookup_combiner (pixman_implementation_t *imp, + pixman_op_t op, + pixman_bool_t component_alpha, + pixman_bool_t narrow) { - (*imp->combine_64_ca[op]) (imp, op, dest, src, mask, width); + while (imp) + { + pixman_combine_32_func_t f = NULL; + + switch ((narrow << 1) | component_alpha) + { + case 0: /* not narrow, not component alpha */ + f = (pixman_combine_32_func_t)imp->combine_float[op]; + break; + + case 1: /* not narrow, component_alpha */ + f = (pixman_combine_32_func_t)imp->combine_float_ca[op]; + break; + + case 2: /* narrow, not component alpha */ + f = imp->combine_32[op]; + break; + + case 3: /* narrow, component_alpha */ + f = imp->combine_32_ca[op]; + break; + } + + if (f) + return f; + + imp = imp->fallback; + } + + /* We should never reach this point */ + _pixman_log_error (FUNC, "No known combine function\n"); + return dummy_combine; } pixman_bool_t @@ -201,14 +239,25 @@ _pixman_implementation_blt (pixman_implementation_t * imp, int dst_bpp, int src_x, int src_y, - int dst_x, - int dst_y, + int dest_x, + int dest_y, int width, int height) { - return (*imp->blt) (imp, src_bits, dst_bits, src_stride, dst_stride, - src_bpp, dst_bpp, src_x, src_y, dst_x, dst_y, - width, height); + while (imp) + { + if (imp->blt && + (*imp->blt) (imp, src_bits, dst_bits, src_stride, dst_stride, + src_bpp, dst_bpp, src_x, src_y, dest_x, dest_y, + width, height)) + { + return TRUE; + } + + imp = imp->fallback; + } + + return FALSE; } pixman_bool_t @@ -220,8 +269,130 @@ _pixman_implementation_fill (pixman_implementation_t *imp, int y, int width, int height, - uint32_t xor) + uint32_t filler) { - return (*imp->fill) (imp, bits, stride, bpp, x, y, width, height, xor); + while (imp) + { + if (imp->fill && + ((*imp->fill) (imp, bits, stride, bpp, x, y, width, height, filler))) + { + return TRUE; + } + + imp = imp->fallback; + } + + return FALSE; } +pixman_bool_t +_pixman_implementation_src_iter_init (pixman_implementation_t *imp, + pixman_iter_t *iter, + pixman_image_t *image, + int x, + int y, + int width, + int height, + uint8_t *buffer, + iter_flags_t iter_flags, + uint32_t image_flags) +{ + iter->image = image; + iter->buffer = (uint32_t *)buffer; + iter->x = x; + iter->y = y; + iter->width = width; + iter->height = height; + iter->iter_flags = iter_flags; + iter->image_flags = image_flags; + + while (imp) + { + if (imp->src_iter_init && (*imp->src_iter_init) (imp, iter)) + return TRUE; + + imp = imp->fallback; + } + + return FALSE; +} + +pixman_bool_t +_pixman_implementation_dest_iter_init (pixman_implementation_t *imp, + pixman_iter_t *iter, + pixman_image_t *image, + int x, + int y, + int width, + int height, + uint8_t *buffer, + iter_flags_t iter_flags, + uint32_t image_flags) +{ + iter->image = image; + iter->buffer = (uint32_t *)buffer; + iter->x = x; + iter->y = y; + iter->width = width; + iter->height = height; + iter->iter_flags = iter_flags; + iter->image_flags = image_flags; + + while (imp) + { + if (imp->dest_iter_init && (*imp->dest_iter_init) (imp, iter)) + return TRUE; + + imp = imp->fallback; + } + + return FALSE; +} + +pixman_bool_t +_pixman_disabled (const char *name) +{ + const char *env; + + if ((env = getenv ("PIXMAN_DISABLE"))) + { + do + { + const char *end; + int len; + + if ((end = strchr (env, ' '))) + len = end - env; + else + len = strlen (env); + + if (strlen (name) == len && strncmp (name, env, len) == 0) + { + printf ("pixman: Disabled %s implementation\n", name); + return TRUE; + } + + env += len; + } + while (*env++); + } + + return FALSE; +} + +pixman_implementation_t * +_pixman_choose_implementation (void) +{ + pixman_implementation_t *imp; + + imp = _pixman_implementation_create_general(); + + if (!_pixman_disabled ("fast")) + imp = _pixman_implementation_create_fast_path (imp); + + imp = _pixman_x86_get_implementations (imp); + + imp = _pixman_implementation_create_noop (imp); + + return imp; +} diff --git a/programs/develop/libraries/pixman/pixman-inlines.h b/programs/develop/libraries/pixman/pixman-inlines.h new file mode 100644 index 0000000000..dd1c2f17f0 --- /dev/null +++ b/programs/develop/libraries/pixman/pixman-inlines.h @@ -0,0 +1,1339 @@ +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Author: Keith Packard, SuSE, Inc. + */ + +#ifndef PIXMAN_FAST_PATH_H__ +#define PIXMAN_FAST_PATH_H__ + +#include "pixman-private.h" + +#define PIXMAN_REPEAT_COVER -1 + +/* Flags describing input parameters to fast path macro template. + * Turning on some flag values may indicate that + * "some property X is available so template can use this" or + * "some property X should be handled by template". + * + * FLAG_HAVE_SOLID_MASK + * Input mask is solid so template should handle this. + * + * FLAG_HAVE_NON_SOLID_MASK + * Input mask is bits mask so template should handle this. + * + * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually + * exclusive. (It's not allowed to turn both flags on) + */ +#define FLAG_NONE (0) +#define FLAG_HAVE_SOLID_MASK (1 << 1) +#define FLAG_HAVE_NON_SOLID_MASK (1 << 2) + +/* To avoid too short repeated scanline function calls, extend source + * scanlines having width less than below constant value. + */ +#define REPEAT_NORMAL_MIN_WIDTH 64 + +static force_inline pixman_bool_t +repeat (pixman_repeat_t repeat, int *c, int size) +{ + if (repeat == PIXMAN_REPEAT_NONE) + { + if (*c < 0 || *c >= size) + return FALSE; + } + else if (repeat == PIXMAN_REPEAT_NORMAL) + { + while (*c >= size) + *c -= size; + while (*c < 0) + *c += size; + } + else if (repeat == PIXMAN_REPEAT_PAD) + { + *c = CLIP (*c, 0, size - 1); + } + else /* REFLECT */ + { + *c = MOD (*c, size * 2); + if (*c >= size) + *c = size * 2 - *c - 1; + } + return TRUE; +} + +static force_inline int +pixman_fixed_to_bilinear_weight (pixman_fixed_t x) +{ + return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) & + ((1 << BILINEAR_INTERPOLATION_BITS) - 1); +} + +#if BILINEAR_INTERPOLATION_BITS <= 4 +/* Inspired by Filter_32_opaque from Skia */ +static force_inline uint32_t +bilinear_interpolation (uint32_t tl, uint32_t tr, + uint32_t bl, uint32_t br, + int distx, int disty) +{ + int distxy, distxiy, distixy, distixiy; + uint32_t lo, hi; + + distx <<= (4 - BILINEAR_INTERPOLATION_BITS); + disty <<= (4 - BILINEAR_INTERPOLATION_BITS); + + distxy = distx * disty; + distxiy = (distx << 4) - distxy; /* distx * (16 - disty) */ + distixy = (disty << 4) - distxy; /* disty * (16 - distx) */ + distixiy = + 16 * 16 - (disty << 4) - + (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */ + + lo = (tl & 0xff00ff) * distixiy; + hi = ((tl >> 8) & 0xff00ff) * distixiy; + + lo += (tr & 0xff00ff) * distxiy; + hi += ((tr >> 8) & 0xff00ff) * distxiy; + + lo += (bl & 0xff00ff) * distixy; + hi += ((bl >> 8) & 0xff00ff) * distixy; + + lo += (br & 0xff00ff) * distxy; + hi += ((br >> 8) & 0xff00ff) * distxy; + + return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff); +} + +#else +#if SIZEOF_LONG > 4 + +static force_inline uint32_t +bilinear_interpolation (uint32_t tl, uint32_t tr, + uint32_t bl, uint32_t br, + int distx, int disty) +{ + uint64_t distxy, distxiy, distixy, distixiy; + uint64_t tl64, tr64, bl64, br64; + uint64_t f, r; + + distx <<= (8 - BILINEAR_INTERPOLATION_BITS); + disty <<= (8 - BILINEAR_INTERPOLATION_BITS); + + distxy = distx * disty; + distxiy = distx * (256 - disty); + distixy = (256 - distx) * disty; + distixiy = (256 - distx) * (256 - disty); + + /* Alpha and Blue */ + tl64 = tl & 0xff0000ff; + tr64 = tr & 0xff0000ff; + bl64 = bl & 0xff0000ff; + br64 = br & 0xff0000ff; + + f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; + r = f & 0x0000ff0000ff0000ull; + + /* Red and Green */ + tl64 = tl; + tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull); + + tr64 = tr; + tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull); + + bl64 = bl; + bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull); + + br64 = br; + br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull); + + f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; + r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull); + + return (uint32_t)(r >> 16); +} + +#else + +static force_inline uint32_t +bilinear_interpolation (uint32_t tl, uint32_t tr, + uint32_t bl, uint32_t br, + int distx, int disty) +{ + int distxy, distxiy, distixy, distixiy; + uint32_t f, r; + + distx <<= (8 - BILINEAR_INTERPOLATION_BITS); + disty <<= (8 - BILINEAR_INTERPOLATION_BITS); + + distxy = distx * disty; + distxiy = (distx << 8) - distxy; /* distx * (256 - disty) */ + distixy = (disty << 8) - distxy; /* disty * (256 - distx) */ + distixiy = + 256 * 256 - (disty << 8) - + (distx << 8) + distxy; /* (256 - distx) * (256 - disty) */ + + /* Blue */ + r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy + + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; + + /* Green */ + f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy + + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; + r |= f & 0xff000000; + + tl >>= 16; + tr >>= 16; + bl >>= 16; + br >>= 16; + r >>= 16; + + /* Red */ + f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy + + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; + r |= f & 0x00ff0000; + + /* Alpha */ + f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy + + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; + r |= f & 0xff000000; + + return r; +} + +#endif +#endif // BILINEAR_INTERPOLATION_BITS <= 4 + +/* + * For each scanline fetched from source image with PAD repeat: + * - calculate how many pixels need to be padded on the left side + * - calculate how many pixels need to be padded on the right side + * - update width to only count pixels which are fetched from the image + * All this information is returned via 'width', 'left_pad', 'right_pad' + * arguments. The code is assuming that 'unit_x' is positive. + * + * Note: 64-bit math is used in order to avoid potential overflows, which + * is probably excessive in many cases. This particular function + * may need its own correctness test and performance tuning. + */ +static force_inline void +pad_repeat_get_scanline_bounds (int32_t source_image_width, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + int32_t * width, + int32_t * left_pad, + int32_t * right_pad) +{ + int64_t max_vx = (int64_t) source_image_width << 16; + int64_t tmp; + if (vx < 0) + { + tmp = ((int64_t) unit_x - 1 - vx) / unit_x; + if (tmp > *width) + { + *left_pad = *width; + *width = 0; + } + else + { + *left_pad = (int32_t) tmp; + *width -= (int32_t) tmp; + } + } + else + { + *left_pad = 0; + } + tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad; + if (tmp < 0) + { + *right_pad = *width; + *width = 0; + } + else if (tmp >= *width) + { + *right_pad = 0; + } + else + { + *right_pad = *width - (int32_t) tmp; + *width = (int32_t) tmp; + } +} + +/* A macroified version of specialized nearest scalers for some + * common 8888 and 565 formats. It supports SRC and OVER ops. + * + * There are two repeat versions, one that handles repeat normal, + * and one without repeat handling that only works if the src region + * used is completely covered by the pre-repeated source samples. + * + * The loops are unrolled to process two pixels per iteration for better + * performance on most CPU architectures (superscalar processors + * can issue several operations simultaneously, other processors can hide + * instructions latencies by pipelining operations). Unrolling more + * does not make much sense because the compiler will start running out + * of spare registers soon. + */ + +#define GET_8888_ALPHA(s) ((s) >> 24) + /* This is not actually used since we don't have an OVER with + 565 source, but it is needed to build. */ +#define GET_0565_ALPHA(s) 0xff +#define GET_x888_ALPHA(s) 0xff + +#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \ + src_type_t, dst_type_t, OP, repeat_mode) \ +static force_inline void \ +scanline_func_name (dst_type_t *dst, \ + const src_type_t *src, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t src_width_fixed, \ + pixman_bool_t fully_transparent_src) \ +{ \ + uint32_t d; \ + src_type_t s1, s2; \ + uint8_t a1, a2; \ + int x1, x2; \ + \ + if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src) \ + return; \ + \ + if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \ + abort(); \ + \ + while ((w -= 2) >= 0) \ + { \ + x1 = pixman_fixed_to_int (vx); \ + vx += unit_x; \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + /* This works because we know that unit_x is positive */ \ + while (vx >= 0) \ + vx -= src_width_fixed; \ + } \ + s1 = *(src + x1); \ + \ + x2 = pixman_fixed_to_int (vx); \ + vx += unit_x; \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + /* This works because we know that unit_x is positive */ \ + while (vx >= 0) \ + vx -= src_width_fixed; \ + } \ + s2 = *(src + x2); \ + \ + if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ + { \ + a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ + a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \ + \ + if (a1 == 0xff) \ + { \ + *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ + } \ + else if (s1) \ + { \ + d = convert_ ## DST_FORMAT ## _to_8888 (*dst); \ + s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \ + a1 ^= 0xff; \ + UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ + *dst = convert_8888_to_ ## DST_FORMAT (d); \ + } \ + dst++; \ + \ + if (a2 == 0xff) \ + { \ + *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \ + } \ + else if (s2) \ + { \ + d = convert_## DST_FORMAT ## _to_8888 (*dst); \ + s2 = convert_## SRC_FORMAT ## _to_8888 (s2); \ + a2 ^= 0xff; \ + UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \ + *dst = convert_8888_to_ ## DST_FORMAT (d); \ + } \ + dst++; \ + } \ + else /* PIXMAN_OP_SRC */ \ + { \ + *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ + *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \ + } \ + } \ + \ + if (w & 1) \ + { \ + x1 = pixman_fixed_to_int (vx); \ + s1 = *(src + x1); \ + \ + if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ + { \ + a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ + \ + if (a1 == 0xff) \ + { \ + *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ + } \ + else if (s1) \ + { \ + d = convert_## DST_FORMAT ## _to_8888 (*dst); \ + s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \ + a1 ^= 0xff; \ + UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ + *dst = convert_8888_to_ ## DST_FORMAT (d); \ + } \ + dst++; \ + } \ + else /* PIXMAN_OP_SRC */ \ + { \ + *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ + } \ + } \ +} + +#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, have_mask, mask_is_solid) \ +static void \ +fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type_t *dst_line; \ + mask_type_t *mask_line; \ + src_type_t *src_first_line; \ + int y; \ + pixman_fixed_t src_width_fixed = pixman_int_to_fixed (src_image->bits.width); \ + pixman_fixed_t max_vy; \ + pixman_vector_t v; \ + pixman_fixed_t vx, vy; \ + pixman_fixed_t unit_x, unit_y; \ + int32_t left_pad, right_pad; \ + \ + src_type_t *src; \ + dst_type_t *dst; \ + mask_type_t solid_mask; \ + const mask_type_t *mask = &solid_mask; \ + int src_stride, mask_stride, dst_stride; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \ + if (have_mask) \ + { \ + if (mask_is_solid) \ + solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \ + else \ + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \ + mask_stride, mask_line, 1); \ + } \ + /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ + * transformed from destination space to source space */ \ + PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ + \ + /* reference point is the center of the pixel */ \ + v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ + v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ + v.vector[2] = pixman_fixed_1; \ + \ + if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ + return; \ + \ + unit_x = src_image->common.transform->matrix[0][0]; \ + unit_y = src_image->common.transform->matrix[1][1]; \ + \ + /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \ + v.vector[0] -= pixman_fixed_e; \ + v.vector[1] -= pixman_fixed_e; \ + \ + vx = v.vector[0]; \ + vy = v.vector[1]; \ + \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + max_vy = pixman_int_to_fixed (src_image->bits.height); \ + \ + /* Clamp repeating positions inside the actual samples */ \ + repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \ + repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ + } \ + \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ + PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ + { \ + pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \ + &width, &left_pad, &right_pad); \ + vx += left_pad * unit_x; \ + } \ + \ + while (--height >= 0) \ + { \ + dst = dst_line; \ + dst_line += dst_stride; \ + if (have_mask && !mask_is_solid) \ + { \ + mask = mask_line; \ + mask_line += mask_stride; \ + } \ + \ + y = pixman_fixed_to_int (vy); \ + vy += unit_y; \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ + { \ + repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \ + src = src_first_line + src_stride * y; \ + if (left_pad > 0) \ + { \ + scanline_func (mask, dst, \ + src + src_image->bits.width - src_image->bits.width + 1, \ + left_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE); \ + } \ + if (width > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad), \ + dst + left_pad, src + src_image->bits.width, width, \ + vx - src_width_fixed, unit_x, src_width_fixed, FALSE); \ + } \ + if (right_pad > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \ + dst + left_pad + width, src + src_image->bits.width, \ + right_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE); \ + } \ + } \ + else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ + { \ + static const src_type_t zero[1] = { 0 }; \ + if (y < 0 || y >= src_image->bits.height) \ + { \ + scanline_func (mask, dst, zero + 1, left_pad + width + right_pad, \ + -pixman_fixed_e, 0, src_width_fixed, TRUE); \ + continue; \ + } \ + src = src_first_line + src_stride * y; \ + if (left_pad > 0) \ + { \ + scanline_func (mask, dst, zero + 1, left_pad, \ + -pixman_fixed_e, 0, src_width_fixed, TRUE); \ + } \ + if (width > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad), \ + dst + left_pad, src + src_image->bits.width, width, \ + vx - src_width_fixed, unit_x, src_width_fixed, FALSE); \ + } \ + if (right_pad > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \ + dst + left_pad + width, zero + 1, right_pad, \ + -pixman_fixed_e, 0, src_width_fixed, TRUE); \ + } \ + } \ + else \ + { \ + src = src_first_line + src_stride * y; \ + scanline_func (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed, \ + unit_x, src_width_fixed, FALSE); \ + } \ + } \ +} + +/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ +#define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, have_mask, mask_is_solid) \ + FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, have_mask, mask_is_solid) + +#define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t, \ + repeat_mode) \ + static force_inline void \ + scanline_func##scale_func_name##_wrapper ( \ + const uint8_t *mask, \ + dst_type_t *dst, \ + const src_type_t *src, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t fully_transparent_src) \ + { \ + scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); \ + } \ + FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \ + src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE) + +#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \ + repeat_mode) \ + FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t, \ + dst_type_t, repeat_mode) + +#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \ + src_type_t, dst_type_t, OP, repeat_mode) \ + FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ + SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \ + OP, repeat_mode) \ + FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP, \ + scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ + src_type_t, dst_type_t, repeat_mode) + + +#define SCALED_NEAREST_FLAGS \ + (FAST_PATH_SCALE_TRANSFORM | \ + FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NEAREST_FILTER | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_NARROW_FORMAT) + +#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ + } + +/* Prefer the use of 'cover' variant, because it is faster */ +#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func) + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func) + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func) + +/*****************************************************************************/ + +/* + * Identify 5 zones in each scanline for bilinear scaling. Depending on + * whether 2 pixels to be interpolated are fetched from the image itself, + * from the padding area around it or from both image and padding area. + */ +static force_inline void +bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + int32_t * left_pad, + int32_t * left_tz, + int32_t * width, + int32_t * right_tz, + int32_t * right_pad) +{ + int width1 = *width, left_pad1, right_pad1; + int width2 = *width, left_pad2, right_pad2; + + pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x, + &width1, &left_pad1, &right_pad1); + pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1, + unit_x, &width2, &left_pad2, &right_pad2); + + *left_pad = left_pad2; + *left_tz = left_pad1 - left_pad2; + *right_tz = right_pad2 - right_pad1; + *right_pad = right_pad1; + *width -= *left_pad + *left_tz + *right_tz + *right_pad; +} + +/* + * Main loop template for single pass bilinear scaling. It needs to be + * provided with 'scanline_func' which should do the compositing operation. + * The needed function has the following prototype: + * + * scanline_func (dst_type_t * dst, + * const mask_type_ * mask, + * const src_type_t * src_top, + * const src_type_t * src_bottom, + * int32_t width, + * int weight_top, + * int weight_bottom, + * pixman_fixed_t vx, + * pixman_fixed_t unit_x, + * pixman_fixed_t max_vx, + * pixman_bool_t zero_src) + * + * Where: + * dst - destination scanline buffer for storing results + * mask - mask buffer (or single value for solid mask) + * src_top, src_bottom - two source scanlines + * width - number of pixels to process + * weight_top - weight of the top row for interpolation + * weight_bottom - weight of the bottom row for interpolation + * vx - initial position for fetching the first pair of + * pixels from the source buffer + * unit_x - position increment needed to move to the next pair + * of pixels + * max_vx - image size as a fixed point value, can be used for + * implementing NORMAL repeat (when it is supported) + * zero_src - boolean hint variable, which is set to TRUE when + * all source pixels are fetched from zero padding + * zone for NONE repeat + * + * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to + * BILINEAR_INTERPOLATION_RANGE, but sometimes it may be less than that + * for NONE repeat when handling fuzzy antialiased top or bottom image + * edges. Also both top and bottom weight variables are guaranteed to + * have value, which is less than BILINEAR_INTERPOLATION_RANGE. + * For example, the weights can fit into unsigned byte or be used + * with 8-bit SIMD multiplication instructions for 8-bit interpolation + * precision. + */ +#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, flags) \ +static void \ +fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type_t *dst_line; \ + mask_type_t *mask_line; \ + src_type_t *src_first_line; \ + int y1, y2; \ + pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \ + pixman_vector_t v; \ + pixman_fixed_t vx, vy; \ + pixman_fixed_t unit_x, unit_y; \ + int32_t left_pad, left_tz, right_tz, right_pad; \ + \ + dst_type_t *dst; \ + mask_type_t solid_mask; \ + const mask_type_t *mask = &solid_mask; \ + int src_stride, mask_stride, dst_stride; \ + \ + int src_width; \ + pixman_fixed_t src_width_fixed; \ + int max_x; \ + pixman_bool_t need_src_extension; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \ + if (flags & FLAG_HAVE_SOLID_MASK) \ + { \ + solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \ + mask_stride = 0; \ + } \ + else if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + { \ + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \ + mask_stride, mask_line, 1); \ + } \ + \ + /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ + * transformed from destination space to source space */ \ + PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ + \ + /* reference point is the center of the pixel */ \ + v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ + v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ + v.vector[2] = pixman_fixed_1; \ + \ + if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ + return; \ + \ + unit_x = src_image->common.transform->matrix[0][0]; \ + unit_y = src_image->common.transform->matrix[1][1]; \ + \ + v.vector[0] -= pixman_fixed_1 / 2; \ + v.vector[1] -= pixman_fixed_1 / 2; \ + \ + vy = v.vector[1]; \ + \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ + PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ + { \ + bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \ + &left_pad, &left_tz, &width, &right_tz, &right_pad); \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ + { \ + /* PAD repeat does not need special handling for 'transition zones' and */ \ + /* they can be combined with 'padding zones' safely */ \ + left_pad += left_tz; \ + right_pad += right_tz; \ + left_tz = right_tz = 0; \ + } \ + v.vector[0] += left_pad * unit_x; \ + } \ + \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + vx = v.vector[0]; \ + repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width)); \ + max_x = pixman_fixed_to_int (vx + (width - 1) * (int64_t)unit_x) + 1; \ + \ + if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH) \ + { \ + src_width = 0; \ + \ + while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x) \ + src_width += src_image->bits.width; \ + \ + need_src_extension = TRUE; \ + } \ + else \ + { \ + src_width = src_image->bits.width; \ + need_src_extension = FALSE; \ + } \ + \ + src_width_fixed = pixman_int_to_fixed (src_width); \ + } \ + \ + while (--height >= 0) \ + { \ + int weight1, weight2; \ + dst = dst_line; \ + dst_line += dst_stride; \ + vx = v.vector[0]; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + { \ + mask = mask_line; \ + mask_line += mask_stride; \ + } \ + \ + y1 = pixman_fixed_to_int (vy); \ + weight2 = pixman_fixed_to_bilinear_weight (vy); \ + if (weight2) \ + { \ + /* both weight1 and weight2 are smaller than BILINEAR_INTERPOLATION_RANGE */ \ + y2 = y1 + 1; \ + weight1 = BILINEAR_INTERPOLATION_RANGE - weight2; \ + } \ + else \ + { \ + /* set both top and bottom row to the same scanline and tweak weights */ \ + y2 = y1; \ + weight1 = weight2 = BILINEAR_INTERPOLATION_RANGE / 2; \ + } \ + vy += unit_y; \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ + { \ + src_type_t *src1, *src2; \ + src_type_t buf1[2]; \ + src_type_t buf2[2]; \ + repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \ + repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \ + src1 = src_first_line + src_stride * y1; \ + src2 = src_first_line + src_stride * y2; \ + \ + if (left_pad > 0) \ + { \ + buf1[0] = buf1[1] = src1[0]; \ + buf2[0] = buf2[1] = src2[0]; \ + scanline_func (dst, mask, \ + buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \ + dst += left_pad; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += left_pad; \ + } \ + if (width > 0) \ + { \ + scanline_func (dst, mask, \ + src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ + dst += width; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += width; \ + } \ + if (right_pad > 0) \ + { \ + buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \ + buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \ + scanline_func (dst, mask, \ + buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \ + } \ + } \ + else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ + { \ + src_type_t *src1, *src2; \ + src_type_t buf1[2]; \ + src_type_t buf2[2]; \ + /* handle top/bottom zero padding by just setting weights to 0 if needed */ \ + if (y1 < 0) \ + { \ + weight1 = 0; \ + y1 = 0; \ + } \ + if (y1 >= src_image->bits.height) \ + { \ + weight1 = 0; \ + y1 = src_image->bits.height - 1; \ + } \ + if (y2 < 0) \ + { \ + weight2 = 0; \ + y2 = 0; \ + } \ + if (y2 >= src_image->bits.height) \ + { \ + weight2 = 0; \ + y2 = src_image->bits.height - 1; \ + } \ + src1 = src_first_line + src_stride * y1; \ + src2 = src_first_line + src_stride * y2; \ + \ + if (left_pad > 0) \ + { \ + buf1[0] = buf1[1] = 0; \ + buf2[0] = buf2[1] = 0; \ + scanline_func (dst, mask, \ + buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \ + dst += left_pad; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += left_pad; \ + } \ + if (left_tz > 0) \ + { \ + buf1[0] = 0; \ + buf1[1] = src1[0]; \ + buf2[0] = 0; \ + buf2[1] = src2[0]; \ + scanline_func (dst, mask, \ + buf1, buf2, left_tz, weight1, weight2, \ + pixman_fixed_frac (vx), unit_x, 0, FALSE); \ + dst += left_tz; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += left_tz; \ + vx += left_tz * unit_x; \ + } \ + if (width > 0) \ + { \ + scanline_func (dst, mask, \ + src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ + dst += width; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += width; \ + vx += width * unit_x; \ + } \ + if (right_tz > 0) \ + { \ + buf1[0] = src1[src_image->bits.width - 1]; \ + buf1[1] = 0; \ + buf2[0] = src2[src_image->bits.width - 1]; \ + buf2[1] = 0; \ + scanline_func (dst, mask, \ + buf1, buf2, right_tz, weight1, weight2, \ + pixman_fixed_frac (vx), unit_x, 0, FALSE); \ + dst += right_tz; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += right_tz; \ + } \ + if (right_pad > 0) \ + { \ + buf1[0] = buf1[1] = 0; \ + buf2[0] = buf2[1] = 0; \ + scanline_func (dst, mask, \ + buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \ + } \ + } \ + else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + int32_t num_pixels; \ + int32_t width_remain; \ + src_type_t * src_line_top; \ + src_type_t * src_line_bottom; \ + src_type_t buf1[2]; \ + src_type_t buf2[2]; \ + src_type_t extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2]; \ + src_type_t extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2]; \ + int i, j; \ + \ + repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); \ + repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); \ + src_line_top = src_first_line + src_stride * y1; \ + src_line_bottom = src_first_line + src_stride * y2; \ + \ + if (need_src_extension) \ + { \ + for (i=0; ibits.width; j++, i++) \ + { \ + extended_src_line0[i] = src_line_top[j]; \ + extended_src_line1[i] = src_line_bottom[j]; \ + } \ + } \ + \ + src_line_top = &extended_src_line0[0]; \ + src_line_bottom = &extended_src_line1[0]; \ + } \ + \ + /* Top & Bottom wrap around buffer */ \ + buf1[0] = src_line_top[src_width - 1]; \ + buf1[1] = src_line_top[0]; \ + buf2[0] = src_line_bottom[src_width - 1]; \ + buf2[1] = src_line_bottom[0]; \ + \ + width_remain = width; \ + \ + while (width_remain > 0) \ + { \ + /* We use src_width_fixed because it can make vx in original source range */ \ + repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \ + \ + /* Wrap around part */ \ + if (pixman_fixed_to_int (vx) == src_width - 1) \ + { \ + /* for positive unit_x \ + * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed \ + * \ + * vx is in range [0, src_width_fixed - pixman_fixed_e] \ + * So we are safe from overflow. \ + */ \ + num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1; \ + \ + if (num_pixels > width_remain) \ + num_pixels = width_remain; \ + \ + scanline_func (dst, mask, buf1, buf2, num_pixels, \ + weight1, weight2, pixman_fixed_frac(vx), \ + unit_x, src_width_fixed, FALSE); \ + \ + width_remain -= num_pixels; \ + vx += num_pixels * unit_x; \ + dst += num_pixels; \ + \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += num_pixels; \ + \ + repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \ + } \ + \ + /* Normal scanline composite */ \ + if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0) \ + { \ + /* for positive unit_x \ + * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1) \ + * \ + * vx is in range [0, src_width_fixed - pixman_fixed_e] \ + * So we are safe from overflow here. \ + */ \ + num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e) \ + / unit_x) + 1; \ + \ + if (num_pixels > width_remain) \ + num_pixels = width_remain; \ + \ + scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels, \ + weight1, weight2, vx, unit_x, src_width_fixed, FALSE); \ + \ + width_remain -= num_pixels; \ + vx += num_pixels * unit_x; \ + dst += num_pixels; \ + \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += num_pixels; \ + } \ + } \ + } \ + else \ + { \ + scanline_func (dst, mask, src_first_line + src_stride * y1, \ + src_first_line + src_stride * y2, width, \ + weight1, weight2, vx, unit_x, max_vx, FALSE); \ + } \ + } \ +} + +/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ +#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, flags) \ + FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\ + dst_type_t, repeat_mode, flags) + +#define SCALED_BILINEAR_FLAGS \ + (FAST_PATH_SCALE_TRANSFORM | \ + FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_BILINEAR_FILTER | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_NARROW_FORMAT) + +#define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \ + } + +/* Prefer the use of 'cover' variant, because it is faster */ +#define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \ + SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func), \ + SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func) + +#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func), \ + SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func) + +#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func), \ + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func) + +#endif diff --git a/programs/develop/libraries/pixman/pixman-linear-gradient.c b/programs/develop/libraries/pixman/pixman-linear-gradient.c index b048e7b065..40c8c9f37d 100644 --- a/programs/develop/libraries/pixman/pixman-linear-gradient.c +++ b/programs/develop/libraries/pixman/pixman-linear-gradient.c @@ -31,36 +31,32 @@ #include #include "pixman-private.h" -static source_image_class_t -linear_gradient_classify (pixman_image_t *image, - int x, - int y, - int width, - int height) +static pixman_bool_t +linear_gradient_is_horizontal (pixman_image_t *image, + int x, + int y, + int width, + int height) { - source_image_t *source = (source_image_t *)image; linear_gradient_t *linear = (linear_gradient_t *)image; pixman_vector_t v; pixman_fixed_32_32_t l; pixman_fixed_48_16_t dx, dy; double inc; - source_image_class_t class; - class = SOURCE_IMAGE_CLASS_UNKNOWN; - - if (source->common.transform) + if (image->common.transform) { /* projective transformation */ - if (source->common.transform->matrix[2][0] != 0 || - source->common.transform->matrix[2][1] != 0 || - source->common.transform->matrix[2][2] == 0) + if (image->common.transform->matrix[2][0] != 0 || + image->common.transform->matrix[2][1] != 0 || + image->common.transform->matrix[2][2] == 0) { - return class; + return FALSE; } - v.vector[0] = source->common.transform->matrix[0][1]; - v.vector[1] = source->common.transform->matrix[1][1]; - v.vector[2] = source->common.transform->matrix[2][2]; + v.vector[0] = image->common.transform->matrix[0][1]; + v.vector[1] = image->common.transform->matrix[1][1]; + v.vector[2] = image->common.transform->matrix[2][2]; } else { @@ -75,7 +71,7 @@ linear_gradient_classify (pixman_image_t *image, l = dx * dx + dy * dy; if (l == 0) - return class; + return FALSE; /* * compute how much the input of the gradient walked changes @@ -87,43 +83,44 @@ linear_gradient_classify (pixman_image_t *image, /* check that casting to integer would result in 0 */ if (-1 < inc && inc < 1) - class = SOURCE_IMAGE_CLASS_HORIZONTAL; + return TRUE; - return class; + return FALSE; } -static void -linear_gradient_get_scanline_32 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) +static uint32_t * +linear_get_scanline_narrow (pixman_iter_t *iter, + const uint32_t *mask) { + pixman_image_t *image = iter->image; + int x = iter->x; + int y = iter->y; + int width = iter->width; + uint32_t * buffer = iter->buffer; + pixman_vector_t v, unit; pixman_fixed_32_32_t l; pixman_fixed_48_16_t dx, dy; gradient_t *gradient = (gradient_t *)image; - source_image_t *source = (source_image_t *)image; linear_gradient_t *linear = (linear_gradient_t *)image; uint32_t *end = buffer + width; pixman_gradient_walker_t walker; - _pixman_gradient_walker_init (&walker, gradient, source->common.repeat); + _pixman_gradient_walker_init (&walker, gradient, image->common.repeat); /* reference point is the center of the pixel */ v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2; v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2; v.vector[2] = pixman_fixed_1; - if (source->common.transform) + if (image->common.transform) { - if (!pixman_transform_point_3d (source->common.transform, &v)) - return; - - unit.vector[0] = source->common.transform->matrix[0][0]; - unit.vector[1] = source->common.transform->matrix[1][0]; - unit.vector[2] = source->common.transform->matrix[2][0]; + if (!pixman_transform_point_3d (image->common.transform, &v)) + return iter->buffer; + + unit.vector[0] = image->common.transform->matrix[0][0]; + unit.vector[1] = image->common.transform->matrix[1][0]; + unit.vector[2] = image->common.transform->matrix[2][0]; } else { @@ -219,18 +216,48 @@ linear_gradient_get_scanline_32 (pixman_image_t *image, v.vector[2] += unit.vector[2]; } } + + iter->y++; + + return iter->buffer; } -static void -linear_gradient_property_changed (pixman_image_t *image) +static uint32_t * +linear_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) { - image->common.get_scanline_32 = linear_gradient_get_scanline_32; - image->common.get_scanline_64 = _pixman_image_get_scanline_generic_64; + uint32_t *buffer = linear_get_scanline_narrow (iter, NULL); + + pixman_expand_to_float ( + (argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width); + + return buffer; +} + +void +_pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter) +{ + if (linear_gradient_is_horizontal ( + iter->image, iter->x, iter->y, iter->width, iter->height)) + { + if (iter->iter_flags & ITER_NARROW) + linear_get_scanline_narrow (iter, NULL); + else + linear_get_scanline_wide (iter, NULL); + + iter->get_scanline = _pixman_iter_get_scanline_noop; + } + else + { + if (iter->iter_flags & ITER_NARROW) + iter->get_scanline = linear_get_scanline_narrow; + else + iter->get_scanline = linear_get_scanline_wide; + } } PIXMAN_EXPORT pixman_image_t * -pixman_image_create_linear_gradient (pixman_point_fixed_t * p1, - pixman_point_fixed_t * p2, +pixman_image_create_linear_gradient (const pixman_point_fixed_t * p1, + const pixman_point_fixed_t * p2, const pixman_gradient_stop_t *stops, int n_stops) { @@ -254,8 +281,6 @@ pixman_image_create_linear_gradient (pixman_point_fixed_t * p1, linear->p2 = *p2; image->type = LINEAR; - image->common.classify = linear_gradient_classify; - image->common.property_changed = linear_gradient_property_changed; return image; } diff --git a/programs/develop/libraries/pixman/pixman-matrix.c b/programs/develop/libraries/pixman/pixman-matrix.c index abdfa05258..89b96826b8 100644 --- a/programs/develop/libraries/pixman/pixman-matrix.c +++ b/programs/develop/libraries/pixman/pixman-matrix.c @@ -25,7 +25,7 @@ */ #ifdef HAVE_CONFIG_H -#include "config.h" +#include #endif #include @@ -34,6 +34,338 @@ #define F(x) pixman_int_to_fixed (x) +static force_inline int +count_leading_zeros (uint32_t x) +{ +#ifdef __GNUC__ + return __builtin_clz (x); +#else + int n = 0; + while (x) + { + n++; + x >>= 1; + } + return 32 - n; +#endif +} + +/* + * Large signed/unsigned integer division with rounding for the platforms with + * only 64-bit integer data type supported (no 128-bit data type). + * + * Arguments: + * hi, lo - high and low 64-bit parts of the dividend + * div - 48-bit divisor + * + * Returns: lowest 64 bits of the result as a return value and highest 64 + * bits of the result to "result_hi" pointer + */ + +/* grade-school unsigned division (128-bit by 48-bit) with rounding to nearest */ +static force_inline uint64_t +rounded_udiv_128_by_48 (uint64_t hi, + uint64_t lo, + uint64_t div, + uint64_t *result_hi) +{ + uint64_t tmp, remainder, result_lo; + assert(div < ((uint64_t)1 << 48)); + + remainder = hi % div; + *result_hi = hi / div; + + tmp = (remainder << 16) + (lo >> 48); + result_lo = tmp / div; + remainder = tmp % div; + + tmp = (remainder << 16) + ((lo >> 32) & 0xFFFF); + result_lo = (result_lo << 16) + (tmp / div); + remainder = tmp % div; + + tmp = (remainder << 16) + ((lo >> 16) & 0xFFFF); + result_lo = (result_lo << 16) + (tmp / div); + remainder = tmp % div; + + tmp = (remainder << 16) + (lo & 0xFFFF); + result_lo = (result_lo << 16) + (tmp / div); + remainder = tmp % div; + + /* round to nearest */ + if (remainder * 2 >= div && ++result_lo == 0) + *result_hi += 1; + + return result_lo; +} + +/* signed division (128-bit by 49-bit) with rounding to nearest */ +static inline int64_t +rounded_sdiv_128_by_49 (int64_t hi, + uint64_t lo, + int64_t div, + int64_t *signed_result_hi) +{ + uint64_t result_lo, result_hi; + int sign = 0; + if (div < 0) + { + div = -div; + sign ^= 1; + } + if (hi < 0) + { + if (lo != 0) + hi++; + hi = -hi; + lo = -lo; + sign ^= 1; + } + result_lo = rounded_udiv_128_by_48 (hi, lo, div, &result_hi); + if (sign) + { + if (result_lo != 0) + result_hi++; + result_hi = -result_hi; + result_lo = -result_lo; + } + if (signed_result_hi) + { + *signed_result_hi = result_hi; + } + return result_lo; +} + +/* + * Multiply 64.16 fixed point value by (2^scalebits) and convert + * to 128-bit integer. + */ +static force_inline void +fixed_64_16_to_int128 (int64_t hi, + int64_t lo, + int64_t *rhi, + int64_t *rlo, + int scalebits) +{ + /* separate integer and fractional parts */ + hi += lo >> 16; + lo &= 0xFFFF; + + if (scalebits <= 0) + { + *rlo = hi >> (-scalebits); + *rhi = *rlo >> 63; + } + else + { + *rhi = hi >> (64 - scalebits); + *rlo = (uint64_t)hi << scalebits; + if (scalebits < 16) + *rlo += lo >> (16 - scalebits); + else + *rlo += lo << (scalebits - 16); + } +} + +/* + * Convert 112.16 fixed point value to 48.16 with clamping for the out + * of range values. + */ +static force_inline pixman_fixed_48_16_t +fixed_112_16_to_fixed_48_16 (int64_t hi, int64_t lo, pixman_bool_t *clampflag) +{ + if ((lo >> 63) != hi) + { + *clampflag = TRUE; + return hi >= 0 ? INT64_MAX : INT64_MIN; + } + else + { + return lo; + } +} + +/* + * Transform a point with 31.16 fixed point coordinates from the destination + * space to a point with 48.16 fixed point coordinates in the source space. + * No overflows are possible for affine transformations and the results are + * accurate including the least significant bit. Projective transformations + * may overflow, in this case the results are just clamped to return maximum + * or minimum 48.16 values (so that the caller can at least handle the NONE + * and PAD repeats correctly) and the return value is FALSE to indicate that + * such clamping has happened. + */ +PIXMAN_EXPORT pixman_bool_t +pixman_transform_point_31_16 (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result) +{ + pixman_bool_t clampflag = FALSE; + int i; + int64_t tmp[3][2], divint; + uint16_t divfrac; + + /* input vector values must have no more than 31 bits (including sign) + * in the integer part */ + assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + + for (i = 0; i < 3; i++) + { + tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16); + tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF); + } + + /* + * separate 64-bit integer and 16-bit fractional parts for the divisor, + * which is also scaled by 65536 after fixed point multiplication. + */ + divint = tmp[2][0] + (tmp[2][1] >> 16); + divfrac = tmp[2][1] & 0xFFFF; + + if (divint == pixman_fixed_1 && divfrac == 0) + { + /* + * this is a simple affine transformation + */ + result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); + result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); + result->v[2] = pixman_fixed_1; + } + else if (divint == 0 && divfrac == 0) + { + /* + * handle zero divisor (if the values are non-zero, set the + * results to maximum positive or minimum negative) + */ + clampflag = TRUE; + + result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); + result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); + + if (result->v[0] > 0) + result->v[0] = INT64_MAX; + else if (result->v[0] < 0) + result->v[0] = INT64_MIN; + + if (result->v[1] > 0) + result->v[1] = INT64_MAX; + else if (result->v[1] < 0) + result->v[1] = INT64_MIN; + } + else + { + /* + * projective transformation, analyze the top 32 bits of the divisor + */ + int32_t hi32divbits = divint >> 32; + if (hi32divbits < 0) + hi32divbits = ~hi32divbits; + + if (hi32divbits == 0) + { + /* the divisor is small, we can actually keep all the bits */ + int64_t hi, rhi, lo, rlo; + int64_t div = (divint << 16) + divfrac; + + fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + + fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + } + else + { + /* the divisor needs to be reduced to 48 bits */ + int64_t hi, rhi, lo, rlo, div; + int shift = 32 - count_leading_zeros (hi32divbits); + fixed_64_16_to_int128 (divint, divfrac, &hi, &div, 16 - shift); + + fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32 - shift); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + + fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32 - shift); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + } + } + result->v[2] = pixman_fixed_1; + return !clampflag; +} + +PIXMAN_EXPORT void +pixman_transform_point_31_16_affine (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result) +{ + int64_t hi0, lo0, hi1, lo1; + + /* input vector values must have no more than 31 bits (including sign) + * in the integer part */ + assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + + hi0 = (int64_t)t->matrix[0][0] * (v->v[0] >> 16); + lo0 = (int64_t)t->matrix[0][0] * (v->v[0] & 0xFFFF); + hi0 += (int64_t)t->matrix[0][1] * (v->v[1] >> 16); + lo0 += (int64_t)t->matrix[0][1] * (v->v[1] & 0xFFFF); + hi0 += (int64_t)t->matrix[0][2]; + + hi1 = (int64_t)t->matrix[1][0] * (v->v[0] >> 16); + lo1 = (int64_t)t->matrix[1][0] * (v->v[0] & 0xFFFF); + hi1 += (int64_t)t->matrix[1][1] * (v->v[1] >> 16); + lo1 += (int64_t)t->matrix[1][1] * (v->v[1] & 0xFFFF); + hi1 += (int64_t)t->matrix[1][2]; + + result->v[0] = hi0 + ((lo0 + 0x8000) >> 16); + result->v[1] = hi1 + ((lo1 + 0x8000) >> 16); + result->v[2] = pixman_fixed_1; +} + +PIXMAN_EXPORT void +pixman_transform_point_31_16_3d (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result) +{ + int i; + int64_t tmp[3][2]; + + /* input vector values must have no more than 31 bits (including sign) + * in the integer part */ + assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + + for (i = 0; i < 3; i++) + { + tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16); + tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF); + } + + result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); + result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); + result->v[2] = tmp[2][0] + ((tmp[2][1] + 0x8000) >> 16); +} + PIXMAN_EXPORT void pixman_transform_init_identity (struct pixman_transform *matrix) { @@ -50,69 +382,41 @@ PIXMAN_EXPORT pixman_bool_t pixman_transform_point_3d (const struct pixman_transform *transform, struct pixman_vector * vector) { - struct pixman_vector result; - pixman_fixed_32_32_t partial; - pixman_fixed_48_16_t v; - int i, j; + pixman_vector_48_16_t tmp; + tmp.v[0] = vector->vector[0]; + tmp.v[1] = vector->vector[1]; + tmp.v[2] = vector->vector[2]; - for (j = 0; j < 3; j++) - { - v = 0; - for (i = 0; i < 3; i++) - { - partial = ((pixman_fixed_48_16_t) transform->matrix[j][i] * - (pixman_fixed_48_16_t) vector->vector[i]); - v += partial >> 16; - } - - if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16) - return FALSE; - - result.vector[j] = (pixman_fixed_t) v; - } - - *vector = result; + pixman_transform_point_31_16_3d (transform, &tmp, &tmp); - if (!result.vector[2]) - return FALSE; + vector->vector[0] = tmp.v[0]; + vector->vector[1] = tmp.v[1]; + vector->vector[2] = tmp.v[2]; - return TRUE; + return vector->vector[0] == tmp.v[0] && + vector->vector[1] == tmp.v[1] && + vector->vector[2] == tmp.v[2]; } PIXMAN_EXPORT pixman_bool_t pixman_transform_point (const struct pixman_transform *transform, struct pixman_vector * vector) { - pixman_fixed_32_32_t partial; - pixman_fixed_34_30_t v[3]; - pixman_fixed_48_16_t quo; - int i, j; + pixman_vector_48_16_t tmp; + tmp.v[0] = vector->vector[0]; + tmp.v[1] = vector->vector[1]; + tmp.v[2] = vector->vector[2]; - for (j = 0; j < 3; j++) - { - v[j] = 0; - - for (i = 0; i < 3; i++) - { - partial = ((pixman_fixed_32_32_t) transform->matrix[j][i] * - (pixman_fixed_32_32_t) vector->vector[i]); - v[j] += partial >> 2; - } - } - - if (!(v[2] >> 16)) - return FALSE; + if (!pixman_transform_point_31_16 (transform, &tmp, &tmp)) + return FALSE; - for (j = 0; j < 2; j++) - { - quo = v[j] / (v[2] >> 16); - if (quo > pixman_max_fixed_48_16 || quo < pixman_min_fixed_48_16) - return FALSE; - vector->vector[j] = (pixman_fixed_t) quo; - } - - vector->vector[2] = pixman_fixed_1; - return TRUE; + vector->vector[0] = tmp.v[0]; + vector->vector[1] = tmp.v[1]; + vector->vector[2] = tmp.v[2]; + + return vector->vector[0] == tmp.v[0] && + vector->vector[1] == tmp.v[1] && + vector->vector[2] == tmp.v[2]; } PIXMAN_EXPORT pixman_bool_t @@ -138,7 +442,7 @@ pixman_transform_multiply (struct pixman_transform * dst, (pixman_fixed_32_32_t) l->matrix[dy][o] * (pixman_fixed_32_32_t) r->matrix[o][dx]; - v += partial >> 16; + v += (partial + 0x8000) >> 16; } if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16) @@ -336,14 +640,14 @@ PIXMAN_EXPORT pixman_bool_t pixman_transform_invert (struct pixman_transform * dst, const struct pixman_transform *src) { - struct pixman_f_transform m, r; + struct pixman_f_transform m; pixman_f_transform_from_pixman_transform (&m, src); - if (!pixman_f_transform_invert (&r, &m)) + if (!pixman_f_transform_invert (&m, &m)) return FALSE; - if (!pixman_transform_from_pixman_f_transform (dst, &r)) + if (!pixman_transform_from_pixman_f_transform (dst, &m)) return FALSE; return TRUE; @@ -425,7 +729,8 @@ pixman_transform_is_inverse (const struct pixman_transform *a, { struct pixman_transform t; - pixman_transform_multiply (&t, a, b); + if (!pixman_transform_multiply (&t, a, b)) + return FALSE; return pixman_transform_is_identity (&t); } @@ -464,17 +769,15 @@ pixman_transform_from_pixman_f_transform (struct pixman_transform * t, return TRUE; } -static const int a[3] = { 3, 3, 2 }; -static const int b[3] = { 2, 1, 1 }; - PIXMAN_EXPORT pixman_bool_t pixman_f_transform_invert (struct pixman_f_transform * dst, const struct pixman_f_transform *src) { + static const int a[3] = { 2, 2, 1 }; + static const int b[3] = { 1, 0, 0 }; + pixman_f_transform_t d; double det; int i, j; - static int a[3] = { 2, 2, 1 }; - static int b[3] = { 1, 0, 0 }; det = 0; for (i = 0; i < 3; i++) @@ -509,10 +812,12 @@ pixman_f_transform_invert (struct pixman_f_transform * dst, if (((i + j) & 1) != 0) p = -p; - dst->m[j][i] = det * p; + d.m[j][i] = det * p; } } + *dst = d; + return TRUE; } diff --git a/programs/develop/libraries/pixman/pixman-mmx.c b/programs/develop/libraries/pixman/pixman-mmx.c index 34637a4fe3..14790c029f 100644 --- a/programs/develop/libraries/pixman/pixman-mmx.c +++ b/programs/develop/libraries/pixman/pixman-mmx.c @@ -33,13 +33,16 @@ #include #endif -#ifdef USE_MMX +#if defined USE_X86_MMX || defined USE_ARM_IWMMXT || defined USE_LOONGSON_MMI +#ifdef USE_LOONGSON_MMI +#include +#else #include +#endif #include "pixman-private.h" #include "pixman-combine32.h" - -#define no_vERBOSE +#include "pixman-inlines.h" #ifdef VERBOSE #define CHECKPOINT() error_f ("at %s %d\n", __FUNCTION__, __LINE__) @@ -47,6 +50,79 @@ #define CHECKPOINT() #endif +#if defined USE_ARM_IWMMXT && __GNUC__ == 4 && __GNUC_MINOR__ < 8 +/* Empty the multimedia state. For some reason, ARM's mmintrin.h doesn't provide this. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_empty (void) +{ + +} +#endif + +#ifdef USE_X86_MMX +# if (defined(__SUNPRO_C) || defined(_MSC_VER) || defined(_WIN64)) +# include +# else +/* We have to compile with -msse to use xmmintrin.h, but that causes SSE + * instructions to be generated that we don't want. Just duplicate the + * functions we want to use. */ +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_movemask_pi8 (__m64 __A) +{ + int ret; + + asm ("pmovmskb %1, %0\n\t" + : "=r" (ret) + : "y" (__A) + ); + + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mulhi_pu16 (__m64 __A, __m64 __B) +{ + asm ("pmulhuw %1, %0\n\t" + : "+y" (__A) + : "y" (__B) + ); + return __A; +} + +# ifdef __OPTIMIZE__ +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shuffle_pi16 (__m64 __A, int8_t const __N) +{ + __m64 ret; + + asm ("pshufw %2, %1, %0\n\t" + : "=y" (ret) + : "y" (__A), "K" (__N) + ); + + return ret; +} +# else +# define _mm_shuffle_pi16(A, N) \ + ({ \ + __m64 ret; \ + \ + asm ("pshufw %2, %1, %0\n\t" \ + : "=y" (ret) \ + : "y" (A), "K" ((const int8_t)N) \ + ); \ + \ + ret; \ + }) +# endif +# endif +#endif + +#ifndef _MSC_VER +#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \ + (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0)) +#endif + /* Notes about writing mmx code * * give memory operands as the second operand. If you give it as the @@ -68,17 +144,41 @@ /* --------------- MMX primitives ------------------------------------- */ -#ifdef __GNUC__ +/* If __m64 is defined as a struct or union, then define M64_MEMBER to be + * the name of the member used to access the data. + * If __m64 requires using mm_cvt* intrinsics functions to convert between + * uint64_t and __m64 values, then define USE_CVT_INTRINSICS. + * If __m64 and uint64_t values can just be cast to each other directly, + * then define USE_M64_CASTS. + * If __m64 is a double datatype, then define USE_M64_DOUBLE. + */ +#ifdef _MSC_VER +# define M64_MEMBER m64_u64 +#elif defined(__ICC) +# define USE_CVT_INTRINSICS +#elif defined(USE_LOONGSON_MMI) +# define USE_M64_DOUBLE +#elif defined(__GNUC__) +# define USE_M64_CASTS +#elif defined(__SUNPRO_C) +# if (__SUNPRO_C >= 0x5120) && !defined(__NOVECTORSIZE__) +/* Solaris Studio 12.3 (Sun C 5.12) introduces __attribute__(__vector_size__) + * support, and defaults to using it to define __m64, unless __NOVECTORSIZE__ + * is defined. If it is used, then the mm_cvt* intrinsics must be used. + */ +# define USE_CVT_INTRINSICS +# else +/* For Studio 12.2 or older, or when __attribute__(__vector_size__) is + * disabled, __m64 is defined as a struct containing "unsigned long long l_". + */ +# define M64_MEMBER l_ +# endif +#endif + +#if defined(USE_M64_CASTS) || defined(USE_CVT_INTRINSICS) || defined(USE_M64_DOUBLE) typedef uint64_t mmxdatafield; #else typedef __m64 mmxdatafield; -/* If __m64 is defined as a struct or union, define M64_MEMBER to be the - name of the member used to access the data */ -# ifdef _MSC_VER -# define M64_MEMBER m64_u64 -# elif defined(__SUNPRO_C) -# define M64_MEMBER l_ -# endif #endif typedef struct @@ -87,24 +187,31 @@ typedef struct mmxdatafield mmx_4x0080; mmxdatafield mmx_565_rgb; mmxdatafield mmx_565_unpack_multiplier; + mmxdatafield mmx_565_pack_multiplier; mmxdatafield mmx_565_r; mmxdatafield mmx_565_g; mmxdatafield mmx_565_b; + mmxdatafield mmx_packed_565_rb; + mmxdatafield mmx_packed_565_g; + mmxdatafield mmx_expand_565_g; + mmxdatafield mmx_expand_565_b; + mmxdatafield mmx_expand_565_r; +#ifndef USE_LOONGSON_MMI mmxdatafield mmx_mask_0; mmxdatafield mmx_mask_1; mmxdatafield mmx_mask_2; mmxdatafield mmx_mask_3; +#endif mmxdatafield mmx_full_alpha; - mmxdatafield mmx_ffff0000ffff0000; - mmxdatafield mmx_0000ffff00000000; - mmxdatafield mmx_000000000000ffff; + mmxdatafield mmx_4x0101; + mmxdatafield mmx_ff000000; } mmx_data_t; #if defined(_MSC_VER) # define MMXDATA_INIT(field, val) { val ## UI64 } #elif defined(M64_MEMBER) /* __m64 is a struct, not an integral type */ # define MMXDATA_INIT(field, val) field = { val ## ULL } -#else /* __m64 is an integral type */ +#else /* mmxdatafield is an integral type */ # define MMXDATA_INIT(field, val) field = val ## ULL #endif @@ -114,25 +221,32 @@ static const mmx_data_t c = MMXDATA_INIT (.mmx_4x0080, 0x0080008000800080), MMXDATA_INIT (.mmx_565_rgb, 0x000001f0003f001f), MMXDATA_INIT (.mmx_565_unpack_multiplier, 0x0000008404100840), + MMXDATA_INIT (.mmx_565_pack_multiplier, 0x2000000420000004), MMXDATA_INIT (.mmx_565_r, 0x000000f800000000), MMXDATA_INIT (.mmx_565_g, 0x0000000000fc0000), MMXDATA_INIT (.mmx_565_b, 0x00000000000000f8), + MMXDATA_INIT (.mmx_packed_565_rb, 0x00f800f800f800f8), + MMXDATA_INIT (.mmx_packed_565_g, 0x0000fc000000fc00), + MMXDATA_INIT (.mmx_expand_565_g, 0x07e007e007e007e0), + MMXDATA_INIT (.mmx_expand_565_b, 0x001f001f001f001f), + MMXDATA_INIT (.mmx_expand_565_r, 0xf800f800f800f800), +#ifndef USE_LOONGSON_MMI MMXDATA_INIT (.mmx_mask_0, 0xffffffffffff0000), MMXDATA_INIT (.mmx_mask_1, 0xffffffff0000ffff), MMXDATA_INIT (.mmx_mask_2, 0xffff0000ffffffff), MMXDATA_INIT (.mmx_mask_3, 0x0000ffffffffffff), +#endif MMXDATA_INIT (.mmx_full_alpha, 0x00ff000000000000), - MMXDATA_INIT (.mmx_ffff0000ffff0000, 0xffff0000ffff0000), - MMXDATA_INIT (.mmx_0000ffff00000000, 0x0000ffff00000000), - MMXDATA_INIT (.mmx_000000000000ffff, 0x000000000000ffff), + MMXDATA_INIT (.mmx_4x0101, 0x0101010101010101), + MMXDATA_INIT (.mmx_ff000000, 0xff000000ff000000), }; -#ifdef __GNUC__ -# ifdef __ICC -# define MC(x) to_m64 (c.mmx_ ## x) -# else -# define MC(x) ((__m64)c.mmx_ ## x) -# endif +#ifdef USE_CVT_INTRINSICS +# define MC(x) to_m64 (c.mmx_ ## x) +#elif defined(USE_M64_CASTS) +# define MC(x) ((__m64)c.mmx_ ## x) +#elif defined(USE_M64_DOUBLE) +# define MC(x) (*(__m64 *)&c.mmx_ ## x) #else # define MC(x) c.mmx_ ## x #endif @@ -140,14 +254,16 @@ static const mmx_data_t c = static force_inline __m64 to_m64 (uint64_t x) { -#ifdef __ICC +#ifdef USE_CVT_INTRINSICS return _mm_cvtsi64_m64 (x); #elif defined M64_MEMBER /* __m64 is a struct, not an integral type */ __m64 res; res.M64_MEMBER = x; return res; -#else /* __m64 is an integral type */ +#elif defined USE_M64_DOUBLE + return *(__m64 *)&x; +#else /* USE_M64_CASTS */ return (__m64)x; #endif } @@ -155,12 +271,14 @@ to_m64 (uint64_t x) static force_inline uint64_t to_uint64 (__m64 x) { -#ifdef __ICC +#ifdef USE_CVT_INTRINSICS return _mm_cvtm64_si64 (x); #elif defined M64_MEMBER /* __m64 is a struct, not an integral type */ uint64_t res = x.M64_MEMBER; return res; -#else /* __m64 is an integral type */ +#elif defined USE_M64_DOUBLE + return *(uint64_t *)&x; +#else /* USE_M64_CASTS */ return (uint64_t)x; #endif } @@ -190,8 +308,7 @@ pix_multiply (__m64 a, __m64 b) res = _mm_mullo_pi16 (a, b); res = _mm_adds_pu16 (res, MC (4x0080)); - res = _mm_adds_pu16 (res, _mm_srli_pi16 (res, 8)); - res = _mm_srli_pi16 (res, 8); + res = _mm_mulhi_pu16 (res, MC (4x0101)); return res; } @@ -205,52 +322,19 @@ pix_add (__m64 a, __m64 b) static force_inline __m64 expand_alpha (__m64 pixel) { - __m64 t1, t2; - - t1 = shift (pixel, -48); - t2 = shift (t1, 16); - t1 = _mm_or_si64 (t1, t2); - t2 = shift (t1, 32); - t1 = _mm_or_si64 (t1, t2); - - return t1; + return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (3, 3, 3, 3)); } static force_inline __m64 expand_alpha_rev (__m64 pixel) { - __m64 t1, t2; - - /* move alpha to low 16 bits and zero the rest */ - t1 = shift (pixel, 48); - t1 = shift (t1, -48); - - t2 = shift (t1, 16); - t1 = _mm_or_si64 (t1, t2); - t2 = shift (t1, 32); - t1 = _mm_or_si64 (t1, t2); - - return t1; + return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (0, 0, 0, 0)); } static force_inline __m64 invert_colors (__m64 pixel) { - __m64 x, y, z; - - x = y = z = pixel; - - x = _mm_and_si64 (x, MC (ffff0000ffff0000)); - y = _mm_and_si64 (y, MC (000000000000ffff)); - z = _mm_and_si64 (z, MC (0000ffff00000000)); - - y = shift (y, 32); - z = shift (z, -32); - - x = _mm_or_si64 (x, y); - x = _mm_or_si64 (x, z); - - return x; + return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (3, 0, 1, 2)); } static force_inline __m64 @@ -276,14 +360,6 @@ in (__m64 src, __m64 mask) return pix_multiply (src, mask); } -static force_inline __m64 -in_over_full_src_alpha (__m64 src, __m64 mask, __m64 dest) -{ - src = _mm_or_si64 (src, MC (full_alpha)); - - return over (in (src, mask), mask, dest); -} - #ifndef _MSC_VER static force_inline __m64 in_over (__m64 src, __m64 srca, __m64 mask, __m64 dest) @@ -298,10 +374,69 @@ in_over (__m64 src, __m64 srca, __m64 mask, __m64 dest) #endif -static force_inline __m64 -load8888 (uint32_t v) +/* Elemental unaligned loads */ + +static force_inline __m64 ldq_u(__m64 *p) { - return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (v), _mm_setzero_si64 ()); +#ifdef USE_X86_MMX + /* x86's alignment restrictions are very relaxed. */ + return *(__m64 *)p; +#elif defined USE_ARM_IWMMXT + int align = (uintptr_t)p & 7; + __m64 *aligned_p; + if (align == 0) + return *p; + aligned_p = (__m64 *)((uintptr_t)p & ~7); + return (__m64) _mm_align_si64 (aligned_p[0], aligned_p[1], align); +#else + struct __una_u64 { __m64 x __attribute__((packed)); }; + const struct __una_u64 *ptr = (const struct __una_u64 *) p; + return (__m64) ptr->x; +#endif +} + +static force_inline uint32_t ldl_u(const uint32_t *p) +{ +#ifdef USE_X86_MMX + /* x86's alignment restrictions are very relaxed. */ + return *p; +#else + struct __una_u32 { uint32_t x __attribute__((packed)); }; + const struct __una_u32 *ptr = (const struct __una_u32 *) p; + return ptr->x; +#endif +} + +static force_inline __m64 +load (const uint32_t *v) +{ +#ifdef USE_LOONGSON_MMI + __m64 ret; + asm ("lwc1 %0, %1\n\t" + : "=f" (ret) + : "m" (*v) + ); + return ret; +#else + return _mm_cvtsi32_si64 (*v); +#endif +} + +static force_inline __m64 +load8888 (const uint32_t *v) +{ +#ifdef USE_LOONGSON_MMI + return _mm_unpacklo_pi8_f (*(__m32 *)v, _mm_setzero_si64 ()); +#else + return _mm_unpacklo_pi8 (load (v), _mm_setzero_si64 ()); +#endif +} + +static force_inline __m64 +load8888u (const uint32_t *v) +{ + uint32_t l = ldl_u (v); + return load8888 (&l); } static force_inline __m64 @@ -310,10 +445,53 @@ pack8888 (__m64 lo, __m64 hi) return _mm_packs_pu16 (lo, hi); } -static force_inline uint32_t -store8888 (__m64 v) +static force_inline void +store (uint32_t *dest, __m64 v) { - return _mm_cvtsi64_si32 (pack8888 (v, _mm_setzero_si64 ())); +#ifdef USE_LOONGSON_MMI + asm ("swc1 %1, %0\n\t" + : "=m" (*dest) + : "f" (v) + : "memory" + ); +#else + *dest = _mm_cvtsi64_si32 (v); +#endif +} + +static force_inline void +store8888 (uint32_t *dest, __m64 v) +{ + v = pack8888 (v, _mm_setzero_si64 ()); + store (dest, v); +} + +static force_inline pixman_bool_t +is_equal (__m64 a, __m64 b) +{ +#ifdef USE_LOONGSON_MMI + /* __m64 is double, we can compare directly. */ + return a == b; +#else + return _mm_movemask_pi8 (_mm_cmpeq_pi8 (a, b)) == 0xff; +#endif +} + +static force_inline pixman_bool_t +is_opaque (__m64 v) +{ +#ifdef USE_LOONGSON_MMI + return is_equal (_mm_and_si64 (v, MC (full_alpha)), MC (full_alpha)); +#else + __m64 ffs = _mm_cmpeq_pi8 (v, v); + return (_mm_movemask_pi8 (_mm_cmpeq_pi8 (v, ffs)) & 0x40); +#endif +} + +static force_inline pixman_bool_t +is_zero (__m64 v) +{ + return is_equal (v, _mm_setzero_si64 ()); } /* Expand 16 bits positioned at @pos (0-3) of a mmx register into @@ -337,7 +515,11 @@ expand565 (__m64 pixel, int pos) __m64 t1, t2; /* move pixel to low 16 bit and zero the rest */ +#ifdef USE_LOONGSON_MMI + p = loongson_extract_pi16 (p, pos); +#else p = shift (shift (p, (3 - pos) * 16), -48); +#endif t1 = shift (p, 36 - 11); t2 = shift (p, 16 - 5); @@ -350,6 +532,36 @@ expand565 (__m64 pixel, int pos) return _mm_srli_pi16 (pixel, 8); } +/* Expand 4 16 bit pixels in an mmx register into two mmx registers of + * + * AARRGGBBRRGGBB + */ +static force_inline void +expand_4xpacked565 (__m64 vin, __m64 *vout0, __m64 *vout1, int full_alpha) +{ + __m64 t0, t1, alpha = _mm_setzero_si64 (); + __m64 r = _mm_and_si64 (vin, MC (expand_565_r)); + __m64 g = _mm_and_si64 (vin, MC (expand_565_g)); + __m64 b = _mm_and_si64 (vin, MC (expand_565_b)); + if (full_alpha) + alpha = _mm_cmpeq_pi32 (alpha, alpha); + + /* Replicate high bits into empty low bits. */ + r = _mm_or_si64 (_mm_srli_pi16 (r, 8), _mm_srli_pi16 (r, 13)); + g = _mm_or_si64 (_mm_srli_pi16 (g, 3), _mm_srli_pi16 (g, 9)); + b = _mm_or_si64 (_mm_slli_pi16 (b, 3), _mm_srli_pi16 (b, 2)); + + r = _mm_packs_pu16 (r, _mm_setzero_si64 ()); /* 00 00 00 00 R3 R2 R1 R0 */ + g = _mm_packs_pu16 (g, _mm_setzero_si64 ()); /* 00 00 00 00 G3 G2 G1 G0 */ + b = _mm_packs_pu16 (b, _mm_setzero_si64 ()); /* 00 00 00 00 B3 B2 B1 B0 */ + + t1 = _mm_unpacklo_pi8 (r, alpha); /* A3 R3 A2 R2 A1 R1 A0 R0 */ + t0 = _mm_unpacklo_pi8 (b, g); /* G3 B3 G2 B2 G1 B1 G0 B0 */ + + *vout0 = _mm_unpacklo_pi16 (t0, t1); /* A1 R1 G1 B1 A0 R0 G0 B0 */ + *vout1 = _mm_unpackhi_pi16 (t0, t1); /* A3 R3 G3 B3 A2 R2 G2 B2 */ +} + static force_inline __m64 expand8888 (__m64 in, int pos) { @@ -365,6 +577,17 @@ expandx888 (__m64 in, int pos) return _mm_or_si64 (expand8888 (in, pos), MC (full_alpha)); } +static force_inline void +expand_4x565 (__m64 vin, __m64 *vout0, __m64 *vout1, __m64 *vout2, __m64 *vout3, int full_alpha) +{ + __m64 v0, v1; + expand_4xpacked565 (vin, &v0, &v1, full_alpha); + *vout0 = expand8888 (v0, 0); + *vout1 = expand8888 (v0, 1); + *vout2 = expand8888 (v1, 0); + *vout3 = expand8888 (v1, 1); +} + static force_inline __m64 pack_565 (__m64 pixel, __m64 target, int pos) { @@ -376,6 +599,15 @@ pack_565 (__m64 pixel, __m64 target, int pos) g = _mm_and_si64 (p, MC (565_g)); b = _mm_and_si64 (p, MC (565_b)); +#ifdef USE_LOONGSON_MMI + r = shift (r, -(32 - 8)); + g = shift (g, -(16 - 3)); + b = shift (b, -(0 + 3)); + + p = _mm_or_si64 (r, g); + p = _mm_or_si64 (p, b); + return loongson_insert_pi16 (t, p, pos); +#else r = shift (r, -(32 - 8) + pos * 16); g = shift (g, -(16 - 3) + pos * 16); b = shift (b, -(0 + 3) + pos * 16); @@ -393,10 +625,42 @@ pack_565 (__m64 pixel, __m64 target, int pos) p = _mm_or_si64 (g, p); return _mm_or_si64 (b, p); +#endif +} + +static force_inline __m64 +pack_4xpacked565 (__m64 a, __m64 b) +{ + __m64 rb0 = _mm_and_si64 (a, MC (packed_565_rb)); + __m64 rb1 = _mm_and_si64 (b, MC (packed_565_rb)); + + __m64 t0 = _mm_madd_pi16 (rb0, MC (565_pack_multiplier)); + __m64 t1 = _mm_madd_pi16 (rb1, MC (565_pack_multiplier)); + + __m64 g0 = _mm_and_si64 (a, MC (packed_565_g)); + __m64 g1 = _mm_and_si64 (b, MC (packed_565_g)); + + t0 = _mm_or_si64 (t0, g0); + t1 = _mm_or_si64 (t1, g1); + + t0 = shift(t0, -5); +#ifdef USE_ARM_IWMMXT + t1 = shift(t1, -5); + return _mm_packs_pu32 (t0, t1); +#else + t1 = shift(t1, -5 + 16); + return _mm_shuffle_pi16 (_mm_or_si64 (t0, t1), _MM_SHUFFLE (3, 1, 2, 0)); +#endif } #ifndef _MSC_VER +static force_inline __m64 +pack_4x565 (__m64 v0, __m64 v1, __m64 v2, __m64 v3) +{ + return pack_4xpacked565 (pack8888 (v0, v1), pack8888 (v2, v3)); +} + static force_inline __m64 pix_add_mul (__m64 x, __m64 a, __m64 y, __m64 b) { @@ -408,32 +672,52 @@ pix_add_mul (__m64 x, __m64 a, __m64 y, __m64 b) #else +/* MSVC only handles a "pass by register" of up to three SSE intrinsics */ + +#define pack_4x565(v0, v1, v2, v3) \ + pack_4xpacked565 (pack8888 (v0, v1), pack8888 (v2, v3)) + #define pix_add_mul(x, a, y, b) \ ( x = pix_multiply (x, a), \ - y = pix_multiply (y, a), \ + y = pix_multiply (y, b), \ pix_add (x, y) ) #endif /* --------------- MMX code patch for fbcompose.c --------------------- */ -static force_inline uint32_t +static force_inline __m64 combine (const uint32_t *src, const uint32_t *mask) { - uint32_t ssrc = *src; + __m64 vsrc = load8888 (src); if (mask) { - __m64 m = load8888 (*mask); - __m64 s = load8888 (ssrc); + __m64 m = load8888 (mask); m = expand_alpha (m); - s = pix_multiply (s, m); - - ssrc = store8888 (s); + vsrc = pix_multiply (vsrc, m); } - return ssrc; + return vsrc; +} + +static force_inline __m64 +core_combine_over_u_pixel_mmx (__m64 vsrc, __m64 vdst) +{ + vsrc = _mm_unpacklo_pi8 (vsrc, _mm_setzero_si64 ()); + + if (is_opaque (vsrc)) + { + return vsrc; + } + else if (!is_zero (vsrc)) + { + return over (vsrc, expand_alpha (vsrc), + _mm_unpacklo_pi8 (vdst, _mm_setzero_si64 ())); + } + + return _mm_unpacklo_pi8 (vdst, _mm_setzero_si64 ()); } static void @@ -448,19 +732,16 @@ mmx_combine_over_u (pixman_implementation_t *imp, while (dest < end) { - uint32_t ssrc = combine (src, mask); - uint32_t a = ssrc >> 24; + __m64 vsrc = combine (src, mask); - if (a == 0xff) + if (is_opaque (vsrc)) { - *dest = ssrc; + store8888 (dest, vsrc); } - else if (ssrc) + else if (!is_zero (vsrc)) { - __m64 s, sa; - s = load8888 (ssrc); - sa = expand_alpha (s); - *dest = store8888 (over (s, sa, load8888 (*dest))); + __m64 sa = expand_alpha (vsrc); + store8888 (dest, over (vsrc, sa, load8888 (dest))); } ++dest; @@ -484,11 +765,11 @@ mmx_combine_over_reverse_u (pixman_implementation_t *imp, while (dest < end) { __m64 d, da; - uint32_t s = combine (src, mask); + __m64 s = combine (src, mask); - d = load8888 (*dest); + d = load8888 (dest); da = expand_alpha (d); - *dest = store8888 (over (d, da, load8888 (s))); + store8888 (dest, over (d, da, s)); ++dest; ++src; @@ -510,14 +791,14 @@ mmx_combine_in_u (pixman_implementation_t *imp, while (dest < end) { - __m64 x, a; + __m64 a; + __m64 x = combine (src, mask); - x = load8888 (combine (src, mask)); - a = load8888 (*dest); + a = load8888 (dest); a = expand_alpha (a); x = pix_multiply (x, a); - *dest = store8888 (x); + store8888 (dest, x); ++dest; ++src; @@ -539,13 +820,13 @@ mmx_combine_in_reverse_u (pixman_implementation_t *imp, while (dest < end) { - __m64 x, a; + __m64 a = combine (src, mask); + __m64 x; - x = load8888 (*dest); - a = load8888 (combine (src, mask)); + x = load8888 (dest); a = expand_alpha (a); x = pix_multiply (x, a); - *dest = store8888 (x); + store8888 (dest, x); ++dest; ++src; @@ -567,14 +848,14 @@ mmx_combine_out_u (pixman_implementation_t *imp, while (dest < end) { - __m64 x, a; + __m64 a; + __m64 x = combine (src, mask); - x = load8888 (combine (src, mask)); - a = load8888 (*dest); + a = load8888 (dest); a = expand_alpha (a); a = negate (a); x = pix_multiply (x, a); - *dest = store8888 (x); + store8888 (dest, x); ++dest; ++src; @@ -596,15 +877,15 @@ mmx_combine_out_reverse_u (pixman_implementation_t *imp, while (dest < end) { - __m64 x, a; + __m64 a = combine (src, mask); + __m64 x; - x = load8888 (*dest); - a = load8888 (combine (src, mask)); + x = load8888 (dest); a = expand_alpha (a); a = negate (a); x = pix_multiply (x, a); - *dest = store8888 (x); + store8888 (dest, x); ++dest; ++src; @@ -626,15 +907,15 @@ mmx_combine_atop_u (pixman_implementation_t *imp, while (dest < end) { - __m64 s, da, d, sia; + __m64 da, d, sia; + __m64 s = combine (src, mask); - s = load8888 (combine (src, mask)); - d = load8888 (*dest); + d = load8888 (dest); sia = expand_alpha (s); sia = negate (sia); da = expand_alpha (d); s = pix_add_mul (s, da, d, sia); - *dest = store8888 (s); + store8888 (dest, s); ++dest; ++src; @@ -658,15 +939,15 @@ mmx_combine_atop_reverse_u (pixman_implementation_t *imp, while (dest < end) { - __m64 s, dia, d, sa; + __m64 dia, d, sa; + __m64 s = combine (src, mask); - s = load8888 (combine (src, mask)); - d = load8888 (*dest); + d = load8888 (dest); sa = expand_alpha (s); dia = expand_alpha (d); dia = negate (dia); s = pix_add_mul (s, dia, d, sa); - *dest = store8888 (s); + store8888 (dest, s); ++dest; ++src; @@ -688,16 +969,16 @@ mmx_combine_xor_u (pixman_implementation_t *imp, while (dest < end) { - __m64 s, dia, d, sia; + __m64 dia, d, sia; + __m64 s = combine (src, mask); - s = load8888 (combine (src, mask)); - d = load8888 (*dest); + d = load8888 (dest); sia = expand_alpha (s); dia = expand_alpha (d); sia = negate (sia); dia = negate (dia); s = pix_add_mul (s, dia, d, sia); - *dest = store8888 (s); + store8888 (dest, s); ++dest; ++src; @@ -719,12 +1000,12 @@ mmx_combine_add_u (pixman_implementation_t *imp, while (dest < end) { - __m64 s, d; + __m64 d; + __m64 s = combine (src, mask); - s = load8888 (combine (src, mask)); - d = load8888 (*dest); + d = load8888 (dest); s = pix_add (s, d); - *dest = store8888 (s); + store8888 (dest, s); ++dest; ++src; @@ -746,22 +1027,25 @@ mmx_combine_saturate_u (pixman_implementation_t *imp, while (dest < end) { - uint32_t s = combine (src, mask); + uint32_t s, sa, da; uint32_t d = *dest; - __m64 ms = load8888 (s); - __m64 md = load8888 (d); - uint32_t sa = s >> 24; - uint32_t da = ~d >> 24; + __m64 ms = combine (src, mask); + __m64 md = load8888 (dest); + + store8888(&s, ms); + da = ~d >> 24; + sa = s >> 24; if (sa > da) { - __m64 msa = load8888 (DIV_UN8 (da, sa) << 24); + uint32_t quot = DIV_UN8 (da, sa) << 24; + __m64 msa = load8888 ("); msa = expand_alpha (msa); ms = pix_multiply (ms, msa); } md = pix_add (md, ms); - *dest = store8888 (md); + store8888 (dest, md); ++src; ++dest; @@ -783,11 +1067,11 @@ mmx_combine_src_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); s = pix_multiply (s, a); - *dest = store8888 (s); + store8888 (dest, s); ++src; ++mask; @@ -808,12 +1092,12 @@ mmx_combine_over_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 sa = expand_alpha (s); - *dest = store8888 (in_over (s, sa, a, d)); + store8888 (dest, in_over (s, sa, a, d)); ++src; ++dest; @@ -834,12 +1118,12 @@ mmx_combine_over_reverse_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 da = expand_alpha (d); - *dest = store8888 (over (d, da, in (s, a))); + store8888 (dest, over (d, da, in (s, a))); ++src; ++dest; @@ -860,14 +1144,14 @@ mmx_combine_in_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 da = expand_alpha (d); s = pix_multiply (s, a); s = pix_multiply (s, da); - *dest = store8888 (s); + store8888 (dest, s); ++src; ++dest; @@ -888,14 +1172,14 @@ mmx_combine_in_reverse_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 sa = expand_alpha (s); a = pix_multiply (a, sa); d = pix_multiply (d, a); - *dest = store8888 (d); + store8888 (dest, d); ++src; ++dest; @@ -916,15 +1200,15 @@ mmx_combine_out_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 da = expand_alpha (d); da = negate (da); s = pix_multiply (s, a); s = pix_multiply (s, da); - *dest = store8888 (s); + store8888 (dest, s); ++src; ++dest; @@ -945,15 +1229,15 @@ mmx_combine_out_reverse_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 sa = expand_alpha (s); a = pix_multiply (a, sa); a = negate (a); d = pix_multiply (d, a); - *dest = store8888 (d); + store8888 (dest, d); ++src; ++dest; @@ -974,9 +1258,9 @@ mmx_combine_atop_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 da = expand_alpha (d); __m64 sa = expand_alpha (s); @@ -984,7 +1268,7 @@ mmx_combine_atop_ca (pixman_implementation_t *imp, a = pix_multiply (a, sa); a = negate (a); d = pix_add_mul (d, a, s, da); - *dest = store8888 (d); + store8888 (dest, d); ++src; ++dest; @@ -1005,9 +1289,9 @@ mmx_combine_atop_reverse_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 da = expand_alpha (d); __m64 sa = expand_alpha (s); @@ -1015,7 +1299,7 @@ mmx_combine_atop_reverse_ca (pixman_implementation_t *imp, a = pix_multiply (a, sa); da = negate (da); d = pix_add_mul (d, a, s, da); - *dest = store8888 (d); + store8888 (dest, d); ++src; ++dest; @@ -1036,9 +1320,9 @@ mmx_combine_xor_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 da = expand_alpha (d); __m64 sa = expand_alpha (s); @@ -1047,7 +1331,7 @@ mmx_combine_xor_ca (pixman_implementation_t *imp, da = negate (da); a = negate (a); d = pix_add_mul (d, a, s, da); - *dest = store8888 (d); + store8888 (dest, d); ++src; ++dest; @@ -1068,13 +1352,13 @@ mmx_combine_add_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); s = pix_multiply (s, a); d = pix_add (s, d); - *dest = store8888 (d); + store8888 (dest, d); ++src; ++dest; @@ -1087,19 +1371,9 @@ mmx_combine_add_ca (pixman_implementation_t *imp, static void mmx_composite_over_n_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t src; uint32_t *dst_line, *dst; int32_t w; @@ -1108,14 +1382,14 @@ mmx_composite_over_n_8888 (pixman_implementation_t *imp, CHECKPOINT (); - src = _pixman_image_get_solid (src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); if (src == 0) return; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); while (height--) @@ -1126,9 +1400,9 @@ mmx_composite_over_n_8888 (pixman_implementation_t *imp, CHECKPOINT (); - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { - *dst = store8888 (over (vsrc, vsrca, load8888 (*dst))); + store8888 (dst, over (vsrc, vsrca, load8888 (dst))); w--; dst++; @@ -1152,12 +1426,9 @@ mmx_composite_over_n_8888 (pixman_implementation_t *imp, CHECKPOINT (); - while (w) + if (w) { - *dst = store8888 (over (vsrc, vsrca, load8888 (*dst))); - - w--; - dst++; + store8888 (dst, over (vsrc, vsrca, load8888 (dst))); } } @@ -1166,19 +1437,9 @@ mmx_composite_over_n_8888 (pixman_implementation_t *imp, static void mmx_composite_over_n_0565 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t src; uint16_t *dst_line, *dst; int32_t w; @@ -1187,14 +1448,14 @@ mmx_composite_over_n_0565 (pixman_implementation_t *imp, CHECKPOINT (); - src = _pixman_image_get_solid (src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); if (src == 0) return; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); while (height--) @@ -1205,7 +1466,7 @@ mmx_composite_over_n_0565 (pixman_implementation_t *imp, CHECKPOINT (); - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { uint64_t d = *dst; __m64 vdest = expand565 (to_m64 (d), 0); @@ -1219,16 +1480,17 @@ mmx_composite_over_n_0565 (pixman_implementation_t *imp, while (w >= 4) { - __m64 vdest; + __m64 vdest = *(__m64 *)dst; + __m64 v0, v1, v2, v3; - vdest = *(__m64 *)dst; + expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0); - vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 0)), vdest, 0); - vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 1)), vdest, 1); - vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 2)), vdest, 2); - vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 3)), vdest, 3); + v0 = over (vsrc, vsrca, v0); + v1 = over (vsrc, vsrca, v1); + v2 = over (vsrc, vsrca, v2); + v3 = over (vsrc, vsrca, v3); - *(__m64 *)dst = vdest; + *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3); dst += 4; w -= 4; @@ -1254,20 +1516,10 @@ mmx_composite_over_n_0565 (pixman_implementation_t *imp, static void mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { - uint32_t src, srca; + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; uint32_t *dst_line; uint32_t *mask_line; int dst_stride, mask_stride; @@ -1275,16 +1527,15 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, CHECKPOINT (); - src = _pixman_image_get_solid (src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - srca = src >> 24; if (src == 0) return; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); while (height--) @@ -1293,15 +1544,15 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, uint32_t *p = (uint32_t *)mask_line; uint32_t *q = (uint32_t *)dst_line; - while (twidth && (unsigned long)q & 7) + while (twidth && (uintptr_t)q & 7) { uint32_t m = *(uint32_t *)p; if (m) { - __m64 vdest = load8888 (*q); - vdest = in_over (vsrc, vsrca, load8888 (m), vdest); - *q = store8888 (vdest); + __m64 vdest = load8888 (q); + vdest = in_over (vsrc, vsrca, load8888 (&m), vdest); + store8888 (q, vdest); } twidth--; @@ -1320,9 +1571,9 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, __m64 dest0, dest1; __m64 vdest = *(__m64 *)q; - dest0 = in_over (vsrc, vsrca, load8888 (m0), + dest0 = in_over (vsrc, vsrca, load8888 (&m0), expand8888 (vdest, 0)); - dest1 = in_over (vsrc, vsrca, load8888 (m1), + dest1 = in_over (vsrc, vsrca, load8888 (&m1), expand8888 (vdest, 1)); *(__m64 *)q = pack8888 (dest0, dest1); @@ -1333,15 +1584,15 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, twidth -= 2; } - while (twidth) + if (twidth) { uint32_t m = *(uint32_t *)p; if (m) { - __m64 vdest = load8888 (*q); - vdest = in_over (vsrc, vsrca, load8888 (m), vdest); - *q = store8888 (vdest); + __m64 vdest = load8888 (q); + vdest = in_over (vsrc, vsrca, load8888 (&m), vdest); + store8888 (q, vdest); } twidth--; @@ -1358,37 +1609,23 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, static void mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t *dst_line, *dst; uint32_t *src_line, *src; uint32_t mask; __m64 vmask; int dst_stride, src_stride; int32_t w; - __m64 srca; CHECKPOINT (); - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - mask = _pixman_image_get_solid (mask_image, dst_image->bits.format); - mask &= 0xff000000; - mask = mask | mask >> 8 | mask >> 16 | mask >> 24; - vmask = load8888 (mask); - srca = MC (4x00ff); + mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); + vmask = expand_alpha (load8888 (&mask)); while (height--) { @@ -1398,12 +1635,12 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp, src_line += src_stride; w = width; - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { - __m64 s = load8888 (*src); - __m64 d = load8888 (*dst); + __m64 s = load8888 (src); + __m64 d = load8888 (dst); - *dst = store8888 (in_over (s, expand_alpha (s), vmask, d)); + store8888 (dst, in_over (s, expand_alpha (s), vmask, d)); w--; dst++; @@ -1412,7 +1649,7 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp, while (w >= 2) { - __m64 vs = *(__m64 *)src; + __m64 vs = ldq_u ((__m64 *)src); __m64 vd = *(__m64 *)dst; __m64 vsrc0 = expand8888 (vs, 0); __m64 vsrc1 = expand8888 (vs, 1); @@ -1426,16 +1663,12 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp, src += 2; } - while (w) + if (w) { - __m64 s = load8888 (*src); - __m64 d = load8888 (*dst); + __m64 s = load8888 (src); + __m64 d = load8888 (dst); - *dst = store8888 (in_over (s, expand_alpha (s), vmask, d)); - - w--; - dst++; - src++; + store8888 (dst, in_over (s, expand_alpha (s), vmask, d)); } } @@ -1444,19 +1677,9 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp, static void mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t *dst_line, *dst; uint32_t *src_line, *src; uint32_t mask; @@ -1467,13 +1690,11 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp, CHECKPOINT (); - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - mask = _pixman_image_get_solid (mask_image, dst_image->bits.format); + mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); - mask &= 0xff000000; - mask = mask | mask >> 8 | mask >> 16 | mask >> 24; - vmask = load8888 (mask); + vmask = expand_alpha (load8888 (&mask)); srca = MC (4x00ff); while (height--) @@ -1484,12 +1705,13 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp, src_line += src_stride; w = width; - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { - __m64 s = load8888 (*src | 0xff000000); - __m64 d = load8888 (*dst); + uint32_t ssrc = *src | 0xff000000; + __m64 s = load8888 (&ssrc); + __m64 d = load8888 (dst); - *dst = store8888 (in_over (s, srca, vmask, d)); + store8888 (dst, in_over (s, srca, vmask, d)); w--; dst++; @@ -1507,14 +1729,14 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp, __m64 vd6 = *(__m64 *)(dst + 12); __m64 vd7 = *(__m64 *)(dst + 14); - __m64 vs0 = *(__m64 *)(src + 0); - __m64 vs1 = *(__m64 *)(src + 2); - __m64 vs2 = *(__m64 *)(src + 4); - __m64 vs3 = *(__m64 *)(src + 6); - __m64 vs4 = *(__m64 *)(src + 8); - __m64 vs5 = *(__m64 *)(src + 10); - __m64 vs6 = *(__m64 *)(src + 12); - __m64 vs7 = *(__m64 *)(src + 14); + __m64 vs0 = ldq_u ((__m64 *)(src + 0)); + __m64 vs1 = ldq_u ((__m64 *)(src + 2)); + __m64 vs2 = ldq_u ((__m64 *)(src + 4)); + __m64 vs3 = ldq_u ((__m64 *)(src + 6)); + __m64 vs4 = ldq_u ((__m64 *)(src + 8)); + __m64 vs5 = ldq_u ((__m64 *)(src + 10)); + __m64 vs6 = ldq_u ((__m64 *)(src + 12)); + __m64 vs7 = ldq_u ((__m64 *)(src + 14)); vd0 = pack8888 ( in_over (expandx888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)), @@ -1564,10 +1786,11 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp, while (w) { - __m64 s = load8888 (*src | 0xff000000); - __m64 d = load8888 (*dst); + uint32_t ssrc = *src | 0xff000000; + __m64 s = load8888 (&ssrc); + __m64 d = load8888 (dst); - *dst = store8888 (in_over (s, srca, vmask, d)); + store8888 (dst, in_over (s, srca, vmask, d)); w--; dst++; @@ -1580,19 +1803,9 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp, static void mmx_composite_over_8888_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t *dst_line, *dst; uint32_t *src_line, *src; uint32_t s; @@ -1602,7 +1815,7 @@ mmx_composite_over_8888_8888 (pixman_implementation_t *imp, CHECKPOINT (); - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); while (height--) @@ -1625,9 +1838,9 @@ mmx_composite_over_8888_8888 (pixman_implementation_t *imp, else if (s) { __m64 ms, sa; - ms = load8888 (s); + ms = load8888 (&s); sa = expand_alpha (ms); - *dst = store8888 (over (ms, sa, load8888 (*dst))); + store8888 (dst, over (ms, sa, load8888 (dst))); } dst++; @@ -1638,19 +1851,9 @@ mmx_composite_over_8888_8888 (pixman_implementation_t *imp, static void mmx_composite_over_8888_0565 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint16_t *dst_line, *dst; uint32_t *src_line, *src; int dst_stride, src_stride; @@ -1658,7 +1861,7 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp, CHECKPOINT (); - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); #if 0 @@ -1676,9 +1879,9 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp, CHECKPOINT (); - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { - __m64 vsrc = load8888 (*src); + __m64 vsrc = load8888 (src); uint64_t d = *dst; __m64 vdest = expand565 (to_m64 (d), 0); @@ -1696,22 +1899,23 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp, while (w >= 4) { + __m64 vdest = *(__m64 *)dst; + __m64 v0, v1, v2, v3; __m64 vsrc0, vsrc1, vsrc2, vsrc3; - __m64 vdest; - vsrc0 = load8888 (*(src + 0)); - vsrc1 = load8888 (*(src + 1)); - vsrc2 = load8888 (*(src + 2)); - vsrc3 = load8888 (*(src + 3)); + expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0); - vdest = *(__m64 *)dst; + vsrc0 = load8888 ((src + 0)); + vsrc1 = load8888 ((src + 1)); + vsrc2 = load8888 ((src + 2)); + vsrc3 = load8888 ((src + 3)); - vdest = pack_565 (over (vsrc0, expand_alpha (vsrc0), expand565 (vdest, 0)), vdest, 0); - vdest = pack_565 (over (vsrc1, expand_alpha (vsrc1), expand565 (vdest, 1)), vdest, 1); - vdest = pack_565 (over (vsrc2, expand_alpha (vsrc2), expand565 (vdest, 2)), vdest, 2); - vdest = pack_565 (over (vsrc3, expand_alpha (vsrc3), expand565 (vdest, 3)), vdest, 3); + v0 = over (vsrc0, expand_alpha (vsrc0), v0); + v1 = over (vsrc1, expand_alpha (vsrc1), v1); + v2 = over (vsrc2, expand_alpha (vsrc2), v2); + v3 = over (vsrc3, expand_alpha (vsrc3), v3); - *(__m64 *)dst = vdest; + *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3); w -= 4; dst += 4; @@ -1722,7 +1926,7 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp, while (w) { - __m64 vsrc = load8888 (*src); + __m64 vsrc = load8888 (src); uint64_t d = *dst; __m64 vdest = expand565 (to_m64 (d), 0); @@ -1741,19 +1945,9 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp, static void mmx_composite_over_n_8_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t src, srca; uint32_t *dst_line, *dst; uint8_t *mask_line, *mask; @@ -1764,7 +1958,7 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp, CHECKPOINT (); - src = _pixman_image_get_solid (src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; if (src == 0) @@ -1772,10 +1966,10 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp, srcsrc = (uint64_t)src << 32 | src; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); while (height--) @@ -1788,7 +1982,7 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp, CHECKPOINT (); - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { uint64_t m = *mask; @@ -1796,9 +1990,9 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp, { __m64 vdest = in_over (vsrc, vsrca, expand_alpha_rev (to_m64 (m)), - load8888 (*dst)); + load8888 (dst)); - *dst = store8888 (vdest); + store8888 (dst, vdest); } w--; @@ -1841,44 +2035,41 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp, CHECKPOINT (); - while (w) + if (w) { uint64_t m = *mask; if (m) { - __m64 vdest = load8888 (*dst); + __m64 vdest = load8888 (dst); vdest = in_over ( vsrc, vsrca, expand_alpha_rev (to_m64 (m)), vdest); - *dst = store8888 (vdest); + store8888 (dst, vdest); } - - w--; - mask++; - dst++; } } _mm_empty (); } -pixman_bool_t -pixman_fill_mmx (uint32_t *bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t xor) +static pixman_bool_t +mmx_fill (pixman_implementation_t *imp, + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t filler) { uint64_t fill; __m64 vfill; uint32_t byte_width; uint8_t *byte_line; -#ifdef __GNUC__ +#if defined __GNUC__ && defined USE_X86_MMX __m64 v1, v2, v3, v4, v5, v6, v7; #endif @@ -1891,7 +2082,7 @@ pixman_fill_mmx (uint32_t *bits, byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x); byte_width = width; stride *= 1; - xor = (xor & 0xff) * 0x01010101; + filler = (filler & 0xff) * 0x01010101; } else if (bpp == 16) { @@ -1899,7 +2090,7 @@ pixman_fill_mmx (uint32_t *bits, byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x); byte_width = 2 * width; stride *= 2; - xor = (xor & 0xffff) * 0x00010001; + filler = (filler & 0xffff) * 0x00010001; } else { @@ -1909,10 +2100,10 @@ pixman_fill_mmx (uint32_t *bits, stride *= 4; } - fill = ((uint64_t)xor << 32) | xor; + fill = ((uint64_t)filler << 32) | filler; vfill = to_m64 (fill); -#ifdef __GNUC__ +#if defined __GNUC__ && defined USE_X86_MMX __asm__ ( "movq %7, %0\n" "movq %7, %1\n" @@ -1934,23 +2125,23 @@ pixman_fill_mmx (uint32_t *bits, byte_line += stride; w = byte_width; - while (w >= 1 && ((unsigned long)d & 1)) + if (w >= 1 && ((uintptr_t)d & 1)) { - *(uint8_t *)d = (xor & 0xff); + *(uint8_t *)d = (filler & 0xff); w--; d++; } - while (w >= 2 && ((unsigned long)d & 3)) + if (w >= 2 && ((uintptr_t)d & 3)) { - *(uint16_t *)d = xor; + *(uint16_t *)d = filler; w -= 2; d += 2; } - while (w >= 4 && ((unsigned long)d & 7)) + while (w >= 4 && ((uintptr_t)d & 7)) { - *(uint32_t *)d = xor; + *(uint32_t *)d = filler; w -= 4; d += 4; @@ -1958,7 +2149,7 @@ pixman_fill_mmx (uint32_t *bits, while (w >= 64) { -#ifdef __GNUC__ +#if defined __GNUC__ && defined USE_X86_MMX __asm__ ( "movq %1, (%0)\n" "movq %2, 8(%0)\n" @@ -1989,20 +2180,20 @@ pixman_fill_mmx (uint32_t *bits, while (w >= 4) { - *(uint32_t *)d = xor; + *(uint32_t *)d = filler; w -= 4; d += 4; } - while (w >= 2) + if (w >= 2) { - *(uint16_t *)d = xor; + *(uint16_t *)d = filler; w -= 2; d += 2; } - while (w >= 1) + if (w >= 1) { - *(uint8_t *)d = (xor & 0xff); + *(uint8_t *)d = (filler & 0xff); w--; d++; } @@ -2014,48 +2205,93 @@ pixman_fill_mmx (uint32_t *bits, } static void -mmx_composite_src_n_8_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +mmx_composite_src_x888_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); + uint16_t *dst_line, *dst; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 7) + { + s = *src++; + *dst = convert_8888_to_0565 (s); + dst++; + w--; + } + + while (w >= 4) + { + __m64 vdest; + __m64 vsrc0 = ldq_u ((__m64 *)(src + 0)); + __m64 vsrc1 = ldq_u ((__m64 *)(src + 2)); + + vdest = pack_4xpacked565 (vsrc0, vsrc1); + + *(__m64 *)dst = vdest; + + w -= 4; + src += 4; + dst += 4; + } + + while (w) + { + s = *src++; + *dst = convert_8888_to_0565 (s); + dst++; + w--; + } + } + + _mm_empty (); +} + +static void +mmx_composite_src_n_8_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); uint32_t src, srca; uint32_t *dst_line, *dst; uint8_t *mask_line, *mask; int dst_stride, mask_stride; int32_t w; - __m64 vsrc, vsrca; + __m64 vsrc; uint64_t srcsrc; CHECKPOINT (); - src = _pixman_image_get_solid (src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; if (src == 0) { - pixman_fill_mmx (dst_image->bits.bits, dst_image->bits.rowstride, - PIXMAN_FORMAT_BPP (dst_image->bits.format), - dest_x, dest_y, width, height, 0); + mmx_fill (imp, dest_image->bits.bits, dest_image->bits.rowstride, + PIXMAN_FORMAT_BPP (dest_image->bits.format), + dest_x, dest_y, width, height, 0); return; } srcsrc = (uint64_t)src << 32 | src; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - vsrc = load8888 (src); - vsrca = expand_alpha (vsrc); + vsrc = load8888 (&src); while (height--) { @@ -2067,7 +2303,7 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp, CHECKPOINT (); - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { uint64_t m = *mask; @@ -2075,7 +2311,7 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp, { __m64 vdest = in (vsrc, expand_alpha_rev (to_m64 (m))); - *dst = store8888 (vdest); + store8888 (dst, vdest); } else { @@ -2101,11 +2337,8 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp, } else if (m0 | m1) { - __m64 vdest; __m64 dest0, dest1; - vdest = *(__m64 *)dst; - dest0 = in (vsrc, expand_alpha_rev (to_m64 (m0))); dest1 = in (vsrc, expand_alpha_rev (to_m64 (m1))); @@ -2123,25 +2356,21 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp, CHECKPOINT (); - while (w) + if (w) { uint64_t m = *mask; if (m) { - __m64 vdest = load8888 (*dst); + __m64 vdest = load8888 (dst); vdest = in (vsrc, expand_alpha_rev (to_m64 (m))); - *dst = store8888 (vdest); + store8888 (dst, vdest); } else { *dst = 0; } - - w--; - mask++; - dst++; } } @@ -2150,47 +2379,33 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp, static void mmx_composite_over_n_8_0565 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t src, srca; uint16_t *dst_line, *dst; uint8_t *mask_line, *mask; int dst_stride, mask_stride; int32_t w; __m64 vsrc, vsrca, tmp; - uint64_t srcsrcsrcsrc, src16; + __m64 srcsrcsrcsrc; CHECKPOINT (); - src = _pixman_image_get_solid (src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; if (src == 0) return; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); tmp = pack_565 (vsrc, _mm_setzero_si64 (), 0); - src16 = to_uint64 (tmp); - - srcsrcsrcsrc = - (uint64_t)src16 << 48 | (uint64_t)src16 << 32 | - (uint64_t)src16 << 16 | (uint64_t)src16; + srcsrcsrcsrc = expand_alpha_rev (tmp); while (height--) { @@ -2202,7 +2417,7 @@ mmx_composite_over_n_8_0565 (pixman_implementation_t *imp, CHECKPOINT (); - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { uint64_t m = *mask; @@ -2234,29 +2449,29 @@ mmx_composite_over_n_8_0565 (pixman_implementation_t *imp, if (srca == 0xff && (m0 & m1 & m2 & m3) == 0xff) { - *(uint64_t *)dst = srcsrcsrcsrc; + *(__m64 *)dst = srcsrcsrcsrc; } else if (m0 | m1 | m2 | m3) { - __m64 vdest; + __m64 vdest = *(__m64 *)dst; + __m64 v0, v1, v2, v3; __m64 vm0, vm1, vm2, vm3; - vdest = *(__m64 *)dst; + expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0); vm0 = to_m64 (m0); - vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm0), - expand565 (vdest, 0)), vdest, 0); - vm1 = to_m64 (m1); - vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm1), - expand565 (vdest, 1)), vdest, 1); - vm2 = to_m64 (m2); - vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm2), - expand565 (vdest, 2)), vdest, 2); - vm3 = to_m64 (m3); - vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm3), - expand565 (vdest, 3)), vdest, 3); + v0 = in_over (vsrc, vsrca, expand_alpha_rev (vm0), v0); - *(__m64 *)dst = vdest; + vm1 = to_m64 (m1); + v1 = in_over (vsrc, vsrca, expand_alpha_rev (vm1), v1); + + vm2 = to_m64 (m2); + v2 = in_over (vsrc, vsrca, expand_alpha_rev (vm2), v2); + + vm3 = to_m64 (m3); + v3 = in_over (vsrc, vsrca, expand_alpha_rev (vm3), v3); + + *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);; } w -= 4; @@ -2291,19 +2506,9 @@ mmx_composite_over_n_8_0565 (pixman_implementation_t *imp, static void mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint16_t *dst_line, *dst; uint32_t *src_line, *src; int dst_stride, src_stride; @@ -2311,7 +2516,7 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp, CHECKPOINT (); - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); #if 0 @@ -2329,9 +2534,9 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp, CHECKPOINT (); - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { - __m64 vsrc = load8888 (*src); + __m64 vsrc = load8888 (src); uint64_t d = *dst; __m64 vdest = expand565 (to_m64 (d), 0); @@ -2363,24 +2568,31 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp, if ((a0 & a1 & a2 & a3) == 0xFF) { - __m64 vdest; - vdest = pack_565 (invert_colors (load8888 (s0)), _mm_setzero_si64 (), 0); - vdest = pack_565 (invert_colors (load8888 (s1)), vdest, 1); - vdest = pack_565 (invert_colors (load8888 (s2)), vdest, 2); - vdest = pack_565 (invert_colors (load8888 (s3)), vdest, 3); + __m64 v0 = invert_colors (load8888 (&s0)); + __m64 v1 = invert_colors (load8888 (&s1)); + __m64 v2 = invert_colors (load8888 (&s2)); + __m64 v3 = invert_colors (load8888 (&s3)); - *(__m64 *)dst = vdest; + *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3); } else if (s0 | s1 | s2 | s3) { __m64 vdest = *(__m64 *)dst; + __m64 v0, v1, v2, v3; - vdest = pack_565 (over_rev_non_pre (load8888 (s0), expand565 (vdest, 0)), vdest, 0); - vdest = pack_565 (over_rev_non_pre (load8888 (s1), expand565 (vdest, 1)), vdest, 1); - vdest = pack_565 (over_rev_non_pre (load8888 (s2), expand565 (vdest, 2)), vdest, 2); - vdest = pack_565 (over_rev_non_pre (load8888 (s3), expand565 (vdest, 3)), vdest, 3); + __m64 vsrc0 = load8888 (&s0); + __m64 vsrc1 = load8888 (&s1); + __m64 vsrc2 = load8888 (&s2); + __m64 vsrc3 = load8888 (&s3); - *(__m64 *)dst = vdest; + expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0); + + v0 = over_rev_non_pre (vsrc0, v0); + v1 = over_rev_non_pre (vsrc1, v1); + v2 = over_rev_non_pre (vsrc2, v2); + v3 = over_rev_non_pre (vsrc3, v3); + + *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3); } w -= 4; @@ -2392,7 +2604,7 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp, while (w) { - __m64 vsrc = load8888 (*src); + __m64 vsrc = load8888 (src); uint64_t d = *dst; __m64 vdest = expand565 (to_m64 (d), 0); @@ -2411,19 +2623,9 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp, static void mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t *dst_line, *dst; uint32_t *src_line, *src; int dst_stride, src_stride; @@ -2431,7 +2633,7 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp, CHECKPOINT (); - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); #if 0 @@ -2447,12 +2649,12 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp, src_line += src_stride; w = width; - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { - __m64 s = load8888 (*src); - __m64 d = load8888 (*dst); + __m64 s = load8888 (src); + __m64 d = load8888 (dst); - *dst = store8888 (over_rev_non_pre (s, d)); + store8888 (dst, over_rev_non_pre (s, d)); w--; dst++; @@ -2461,7 +2663,7 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp, while (w >= 2) { - uint64_t s0, s1; + uint32_t s0, s1; unsigned char a0, a1; __m64 d0, d1; @@ -2473,8 +2675,8 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp, if ((a0 & a1) == 0xFF) { - d0 = invert_colors (load8888 (s0)); - d1 = invert_colors (load8888 (s1)); + d0 = invert_colors (load8888 (&s0)); + d1 = invert_colors (load8888 (&s1)); *(__m64 *)dst = pack8888 (d0, d1); } @@ -2482,8 +2684,8 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp, { __m64 vdest = *(__m64 *)dst; - d0 = over_rev_non_pre (load8888 (s0), expand8888 (vdest, 0)); - d1 = over_rev_non_pre (load8888 (s1), expand8888 (vdest, 1)); + d0 = over_rev_non_pre (load8888 (&s0), expand8888 (vdest, 0)); + d1 = over_rev_non_pre (load8888 (&s1), expand8888 (vdest, 1)); *(__m64 *)dst = pack8888 (d0, d1); } @@ -2493,16 +2695,12 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp, src += 2; } - while (w) + if (w) { - __m64 s = load8888 (*src); - __m64 d = load8888 (*dst); + __m64 s = load8888 (src); + __m64 d = load8888 (dst); - *dst = store8888 (over_rev_non_pre (s, d)); - - w--; - dst++; - src++; + store8888 (dst, over_rev_non_pre (s, d)); } } @@ -2511,20 +2709,10 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp, static void mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { - uint32_t src, srca; + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; uint16_t *dst_line; uint32_t *mask_line; int dst_stride, mask_stride; @@ -2532,16 +2720,15 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, CHECKPOINT (); - src = _pixman_image_get_solid (src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - srca = src >> 24; if (src == 0) return; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); while (height--) @@ -2550,7 +2737,7 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, uint32_t *p = (uint32_t *)mask_line; uint16_t *q = (uint16_t *)dst_line; - while (twidth && ((unsigned long)q & 7)) + while (twidth && ((uintptr_t)q & 7)) { uint32_t m = *(uint32_t *)p; @@ -2558,7 +2745,7 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, { uint64_t d = *q; __m64 vdest = expand565 (to_m64 (d), 0); - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0); + vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m), vdest), vdest, 0); *q = to_uint64 (vdest); } @@ -2579,13 +2766,16 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, if ((m0 | m1 | m2 | m3)) { __m64 vdest = *(__m64 *)q; + __m64 v0, v1, v2, v3; - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m0), expand565 (vdest, 0)), vdest, 0); - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m1), expand565 (vdest, 1)), vdest, 1); - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m2), expand565 (vdest, 2)), vdest, 2); - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m3), expand565 (vdest, 3)), vdest, 3); + expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0); - *(__m64 *)q = vdest; + v0 = in_over (vsrc, vsrca, load8888 (&m0), v0); + v1 = in_over (vsrc, vsrca, load8888 (&m1), v1); + v2 = in_over (vsrc, vsrca, load8888 (&m2), v2); + v3 = in_over (vsrc, vsrca, load8888 (&m3), v3); + + *(__m64 *)q = pack_4x565 (v0, v1, v2, v3); } twidth -= 4; p += 4; @@ -2601,7 +2791,7 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, { uint64_t d = *q; __m64 vdest = expand565 (to_m64 (d), 0); - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0); + vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m), vdest), vdest, 0); *q = to_uint64 (vdest); } @@ -2619,19 +2809,9 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, static void mmx_composite_in_n_8_8 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint8_t *dst_line, *dst; uint8_t *mask_line, *mask; int dst_stride, mask_stride; @@ -2640,14 +2820,14 @@ mmx_composite_in_n_8_8 (pixman_implementation_t *imp, uint8_t sa; __m64 vsrc, vsrca; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - src = _pixman_image_get_solid (src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); sa = src >> 24; - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); while (height--) @@ -2658,26 +2838,35 @@ mmx_composite_in_n_8_8 (pixman_implementation_t *imp, mask_line += mask_stride; w = width; - if ((((unsigned long)dst_image & 3) == 0) && - (((unsigned long)src_image & 3) == 0)) + while (w && (uintptr_t)dst & 7) { - while (w >= 4) - { - uint32_t m; - __m64 vmask; - __m64 vdest; + uint16_t tmp; + uint8_t a; + uint32_t m, d; - m = 0; + a = *mask++; + d = *dst; - vmask = load8888 (*(uint32_t *)mask); - vdest = load8888 (*(uint32_t *)dst); + m = MUL_UN8 (sa, a, tmp); + d = MUL_UN8 (m, d, tmp); - *(uint32_t *)dst = store8888 (in (in (vsrca, vmask), vdest)); + *dst++ = d; + w--; + } - dst += 4; - mask += 4; - w -= 4; - } + while (w >= 4) + { + __m64 vmask; + __m64 vdest; + + vmask = load8888u ((uint32_t *)mask); + vdest = load8888 ((uint32_t *)dst); + + store8888 ((uint32_t *)dst, in (in (vsrca, vmask), vdest)); + + dst += 4; + mask += 4; + w -= 4; } while (w--) @@ -2701,25 +2890,15 @@ mmx_composite_in_n_8_8 (pixman_implementation_t *imp, static void mmx_composite_in_8_8 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint8_t *dst_line, *dst; uint8_t *src_line, *src; int src_stride, dst_stride; int32_t w; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); while (height--) @@ -2730,20 +2909,31 @@ mmx_composite_in_8_8 (pixman_implementation_t *imp, src_line += src_stride; w = width; - if ((((unsigned long)dst_image & 3) == 0) && - (((unsigned long)src_image & 3) == 0)) + while (w && (uintptr_t)dst & 3) { - while (w >= 4) - { - uint32_t *s = (uint32_t *)src; - uint32_t *d = (uint32_t *)dst; + uint8_t s, d; + uint16_t tmp; - *d = store8888 (in (load8888 (*s), load8888 (*d))); + s = *src; + d = *dst; - w -= 4; - dst += 4; - src += 4; - } + *dst = MUL_UN8 (s, d, tmp); + + src++; + dst++; + w--; + } + + while (w >= 4) + { + uint32_t *s = (uint32_t *)src; + uint32_t *d = (uint32_t *)dst; + + store8888 (d, in (load8888u (s), load8888 (d))); + + w -= 4; + dst += 4; + src += 4; } while (w--) @@ -2766,19 +2956,9 @@ mmx_composite_in_8_8 (pixman_implementation_t *imp, static void mmx_composite_add_n_8_8 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint8_t *dst_line, *dst; uint8_t *mask_line, *mask; int dst_stride, mask_stride; @@ -2787,17 +2967,17 @@ mmx_composite_add_n_8_8 (pixman_implementation_t *imp, uint8_t sa; __m64 vsrc, vsrca; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - src = _pixman_image_get_solid (src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); sa = src >> 24; if (src == 0) return; - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); while (height--) @@ -2808,20 +2988,36 @@ mmx_composite_add_n_8_8 (pixman_implementation_t *imp, mask_line += mask_stride; w = width; - if ((((unsigned long)mask_image & 3) == 0) && - (((unsigned long)dst_image & 3) == 0)) + while (w && (uintptr_t)dst & 3) { - while (w >= 4) - { - __m64 vmask = load8888 (*(uint32_t *)mask); - __m64 vdest = load8888 (*(uint32_t *)dst); + uint16_t tmp; + uint16_t a; + uint32_t m, d; + uint32_t r; - *(uint32_t *)dst = store8888 (_mm_adds_pu8 (in (vsrca, vmask), vdest)); + a = *mask++; + d = *dst; - w -= 4; - dst += 4; - mask += 4; - } + m = MUL_UN8 (sa, a, tmp); + r = ADD_UN8 (m, d, tmp); + + *dst++ = r; + w--; + } + + while (w >= 4) + { + __m64 vmask; + __m64 vdest; + + vmask = load8888u ((uint32_t *)mask); + vdest = load8888 ((uint32_t *)dst); + + store8888 ((uint32_t *)dst, _mm_adds_pu8 (in (vsrca, vmask), vdest)); + + dst += 4; + mask += 4; + w -= 4; } while (w--) @@ -2846,19 +3042,9 @@ mmx_composite_add_n_8_8 (pixman_implementation_t *imp, static void mmx_composite_add_8_8 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint8_t *dst_line, *dst; uint8_t *src_line, *src; int dst_stride, src_stride; @@ -2869,7 +3055,7 @@ mmx_composite_add_8_8 (pixman_implementation_t *imp, CHECKPOINT (); PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); while (height--) { @@ -2879,7 +3065,7 @@ mmx_composite_add_8_8 (pixman_implementation_t *imp, src_line += src_stride; w = width; - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { s = *src; d = *dst; @@ -2894,7 +3080,7 @@ mmx_composite_add_8_8 (pixman_implementation_t *imp, while (w >= 8) { - *(__m64*)dst = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst); + *(__m64*)dst = _mm_adds_pu8 (ldq_u ((__m64 *)src), *(__m64*)dst); dst += 8; src += 8; w -= 8; @@ -2918,30 +3104,21 @@ mmx_composite_add_8_8 (pixman_implementation_t *imp, } static void -mmx_composite_add_8888_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +mmx_composite_add_0565_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) { - __m64 dst64; - uint32_t *dst_line, *dst; - uint32_t *src_line, *src; + PIXMAN_COMPOSITE_ARGS (info); + uint16_t *dst_line, *dst; + uint32_t d; + uint16_t *src_line, *src; + uint32_t s; int dst_stride, src_stride; int32_t w; CHECKPOINT (); - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); while (height--) { @@ -2951,10 +3128,92 @@ mmx_composite_add_8888_8888 (pixman_implementation_t *imp, src_line += src_stride; w = width; - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { - *dst = _mm_cvtsi64_si32 (_mm_adds_pu8 (_mm_cvtsi32_si64 (*src), - _mm_cvtsi32_si64 (*dst))); + s = *src++; + if (s) + { + d = *dst; + s = convert_0565_to_8888 (s); + if (d) + { + d = convert_0565_to_8888 (d); + UN8x4_ADD_UN8x4 (s, d); + } + *dst = convert_8888_to_0565 (s); + } + dst++; + w--; + } + + while (w >= 4) + { + __m64 vdest = *(__m64 *)dst; + __m64 vsrc = ldq_u ((__m64 *)src); + __m64 vd0, vd1; + __m64 vs0, vs1; + + expand_4xpacked565 (vdest, &vd0, &vd1, 0); + expand_4xpacked565 (vsrc, &vs0, &vs1, 0); + + vd0 = _mm_adds_pu8 (vd0, vs0); + vd1 = _mm_adds_pu8 (vd1, vs1); + + *(__m64 *)dst = pack_4xpacked565 (vd0, vd1); + + dst += 4; + src += 4; + w -= 4; + } + + while (w--) + { + s = *src++; + if (s) + { + d = *dst; + s = convert_0565_to_8888 (s); + if (d) + { + d = convert_0565_to_8888 (d); + UN8x4_ADD_UN8x4 (s, d); + } + *dst = convert_8888_to_0565 (s); + } + dst++; + } + } + + _mm_empty (); +} + +static void +mmx_composite_add_8888_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + + CHECKPOINT (); + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 7) + { + store (dst, _mm_adds_pu8 (load ((const uint32_t *)src), + load ((const uint32_t *)dst))); dst++; src++; w--; @@ -2962,8 +3221,7 @@ mmx_composite_add_8888_8888 (pixman_implementation_t *imp, while (w >= 2) { - dst64 = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst); - *(uint64_t*)dst = to_uint64 (dst64); + *(__m64 *)dst = _mm_adds_pu8 (ldq_u ((__m64 *)src), *(__m64*)dst); dst += 2; src += 2; w -= 2; @@ -2971,8 +3229,8 @@ mmx_composite_add_8888_8888 (pixman_implementation_t *imp, if (w) { - *dst = _mm_cvtsi64_si32 (_mm_adds_pu8 (_mm_cvtsi32_si64 (*src), - _mm_cvtsi32_si64 (*dst))); + store (dst, _mm_adds_pu8 (load ((const uint32_t *)src), + load ((const uint32_t *)dst))); } } @@ -2981,18 +3239,19 @@ mmx_composite_add_8888_8888 (pixman_implementation_t *imp, } static pixman_bool_t -pixman_blt_mmx (uint32_t *src_bits, - uint32_t *dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, - int src_y, - int dst_x, - int dst_y, - int width, - int height) +mmx_blt (pixman_implementation_t *imp, + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dest_x, + int dest_y, + int width, + int height) { uint8_t * src_bytes; uint8_t * dst_bytes; @@ -3006,7 +3265,7 @@ pixman_blt_mmx (uint32_t *src_bits, src_stride = src_stride * (int) sizeof (uint32_t) / 2; dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x)); - dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); + dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x)); byte_width = 2 * width; src_stride *= 2; dst_stride *= 2; @@ -3016,7 +3275,7 @@ pixman_blt_mmx (uint32_t *src_bits, src_stride = src_stride * (int) sizeof (uint32_t) / 4; dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x)); - dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); + dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x)); byte_width = 4 * width; src_stride *= 4; dst_stride *= 4; @@ -3035,7 +3294,15 @@ pixman_blt_mmx (uint32_t *src_bits, dst_bytes += dst_stride; w = byte_width; - while (w >= 2 && ((unsigned long)d & 3)) + if (w >= 1 && ((uintptr_t)d & 1)) + { + *(uint8_t *)d = *(uint8_t *)s; + w -= 1; + s += 1; + d += 1; + } + + if (w >= 2 && ((uintptr_t)d & 3)) { *(uint16_t *)d = *(uint16_t *)s; w -= 2; @@ -3043,9 +3310,9 @@ pixman_blt_mmx (uint32_t *src_bits, d += 2; } - while (w >= 4 && ((unsigned long)d & 7)) + while (w >= 4 && ((uintptr_t)d & 7)) { - *(uint32_t *)d = *(uint32_t *)s; + *(uint32_t *)d = ldl_u ((uint32_t *)s); w -= 4; s += 4; @@ -3054,7 +3321,7 @@ pixman_blt_mmx (uint32_t *src_bits, while (w >= 64) { -#if defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) +#if (defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined USE_X86_MMX __asm__ ( "movq (%1), %%mm0\n" "movq 8(%1), %%mm1\n" @@ -3079,14 +3346,14 @@ pixman_blt_mmx (uint32_t *src_bits, "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); #else - __m64 v0 = *(__m64 *)(s + 0); - __m64 v1 = *(__m64 *)(s + 8); - __m64 v2 = *(__m64 *)(s + 16); - __m64 v3 = *(__m64 *)(s + 24); - __m64 v4 = *(__m64 *)(s + 32); - __m64 v5 = *(__m64 *)(s + 40); - __m64 v6 = *(__m64 *)(s + 48); - __m64 v7 = *(__m64 *)(s + 56); + __m64 v0 = ldq_u ((__m64 *)(s + 0)); + __m64 v1 = ldq_u ((__m64 *)(s + 8)); + __m64 v2 = ldq_u ((__m64 *)(s + 16)); + __m64 v3 = ldq_u ((__m64 *)(s + 24)); + __m64 v4 = ldq_u ((__m64 *)(s + 32)); + __m64 v5 = ldq_u ((__m64 *)(s + 40)); + __m64 v6 = ldq_u ((__m64 *)(s + 48)); + __m64 v7 = ldq_u ((__m64 *)(s + 56)); *(__m64 *)(d + 0) = v0; *(__m64 *)(d + 8) = v1; *(__m64 *)(d + 16) = v2; @@ -3103,7 +3370,7 @@ pixman_blt_mmx (uint32_t *src_bits, } while (w >= 4) { - *(uint32_t *)d = *(uint32_t *)s; + *(uint32_t *)d = ldl_u ((uint32_t *)s); w -= 4; s += 4; @@ -3125,51 +3392,31 @@ pixman_blt_mmx (uint32_t *src_bits, static void mmx_composite_copy_area (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { - pixman_blt_mmx (src_image->bits.bits, - dst_image->bits.bits, - src_image->bits.rowstride, - dst_image->bits.rowstride, - PIXMAN_FORMAT_BPP (src_image->bits.format), - PIXMAN_FORMAT_BPP (dst_image->bits.format), - src_x, src_y, dest_x, dest_y, width, height); + PIXMAN_COMPOSITE_ARGS (info); + + mmx_blt (imp, src_image->bits.bits, + dest_image->bits.bits, + src_image->bits.rowstride, + dest_image->bits.rowstride, + PIXMAN_FORMAT_BPP (src_image->bits.format), + PIXMAN_FORMAT_BPP (dest_image->bits.format), + src_x, src_y, dest_x, dest_y, width, height); } -#if 0 static void mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t *src, *src_line; uint32_t *dst, *dst_line; uint8_t *mask, *mask_line; int src_stride, mask_stride, dst_stride; int32_t w; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); @@ -3190,19 +3437,20 @@ mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp, if (m) { - __m64 s = load8888 (*src | 0xff000000); + uint32_t ssrc = *src | 0xff000000; + __m64 s = load8888 (&ssrc); if (m == 0xff) { - *dst = store8888 (s); + store8888 (dst, s); } else { __m64 sa = expand_alpha (s); __m64 vm = expand_alpha_rev (to_m64 (m)); - __m64 vdest = in_over (s, sa, vm, load8888 (*dst)); + __m64 vdest = in_over (s, sa, vm, load8888 (dst)); - *dst = store8888 (vdest); + store8888 (dst, vdest); } } @@ -3214,7 +3462,489 @@ mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp, _mm_empty (); } -#endif + +static void +mmx_composite_over_reverse_n_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; + uint32_t *dst_line, *dst; + int32_t w; + int dst_stride; + __m64 vsrc; + + CHECKPOINT (); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + + vsrc = load8888 (&src); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + w = width; + + CHECKPOINT (); + + while (w && (uintptr_t)dst & 7) + { + __m64 vdest = load8888 (dst); + + store8888 (dst, over (vdest, expand_alpha (vdest), vsrc)); + + w--; + dst++; + } + + while (w >= 2) + { + __m64 vdest = *(__m64 *)dst; + __m64 dest0 = expand8888 (vdest, 0); + __m64 dest1 = expand8888 (vdest, 1); + + + dest0 = over (dest0, expand_alpha (dest0), vsrc); + dest1 = over (dest1, expand_alpha (dest1), vsrc); + + *(__m64 *)dst = pack8888 (dest0, dest1); + + dst += 2; + w -= 2; + } + + CHECKPOINT (); + + if (w) + { + __m64 vdest = load8888 (dst); + + store8888 (dst, over (vdest, expand_alpha (vdest), vsrc)); + } + } + + _mm_empty (); +} + +#define BSHIFT ((1 << BILINEAR_INTERPOLATION_BITS)) +#define BMSK (BSHIFT - 1) + +#define BILINEAR_DECLARE_VARIABLES \ + const __m64 mm_wt = _mm_set_pi16 (wt, wt, wt, wt); \ + const __m64 mm_wb = _mm_set_pi16 (wb, wb, wb, wb); \ + const __m64 mm_BSHIFT = _mm_set_pi16 (BSHIFT, BSHIFT, BSHIFT, BSHIFT); \ + const __m64 mm_addc7 = _mm_set_pi16 (0, 1, 0, 1); \ + const __m64 mm_xorc7 = _mm_set_pi16 (0, BMSK, 0, BMSK); \ + const __m64 mm_ux = _mm_set_pi16 (unit_x, unit_x, unit_x, unit_x); \ + const __m64 mm_zero = _mm_setzero_si64 (); \ + __m64 mm_x = _mm_set_pi16 (vx, vx, vx, vx) + +#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix) \ +do { \ + /* fetch 2x2 pixel block into 2 mmx registers */ \ + __m64 t = ldq_u ((__m64 *)&src_top [pixman_fixed_to_int (vx)]); \ + __m64 b = ldq_u ((__m64 *)&src_bottom [pixman_fixed_to_int (vx)]); \ + /* vertical interpolation */ \ + __m64 t_hi = _mm_mullo_pi16 (_mm_unpackhi_pi8 (t, mm_zero), mm_wt); \ + __m64 t_lo = _mm_mullo_pi16 (_mm_unpacklo_pi8 (t, mm_zero), mm_wt); \ + __m64 b_hi = _mm_mullo_pi16 (_mm_unpackhi_pi8 (b, mm_zero), mm_wb); \ + __m64 b_lo = _mm_mullo_pi16 (_mm_unpacklo_pi8 (b, mm_zero), mm_wb); \ + __m64 hi = _mm_add_pi16 (t_hi, b_hi); \ + __m64 lo = _mm_add_pi16 (t_lo, b_lo); \ + vx += unit_x; \ + if (BILINEAR_INTERPOLATION_BITS < 8) \ + { \ + /* calculate horizontal weights */ \ + __m64 mm_wh = _mm_add_pi16 (mm_addc7, _mm_xor_si64 (mm_xorc7, \ + _mm_srli_pi16 (mm_x, \ + 16 - BILINEAR_INTERPOLATION_BITS))); \ + /* horizontal interpolation */ \ + __m64 p = _mm_unpacklo_pi16 (lo, hi); \ + __m64 q = _mm_unpackhi_pi16 (lo, hi); \ + lo = _mm_madd_pi16 (p, mm_wh); \ + hi = _mm_madd_pi16 (q, mm_wh); \ + } \ + else \ + { \ + /* calculate horizontal weights */ \ + __m64 mm_wh_lo = _mm_sub_pi16 (mm_BSHIFT, _mm_srli_pi16 (mm_x, \ + 16 - BILINEAR_INTERPOLATION_BITS)); \ + __m64 mm_wh_hi = _mm_srli_pi16 (mm_x, \ + 16 - BILINEAR_INTERPOLATION_BITS); \ + /* horizontal interpolation */ \ + __m64 mm_lo_lo = _mm_mullo_pi16 (lo, mm_wh_lo); \ + __m64 mm_lo_hi = _mm_mullo_pi16 (hi, mm_wh_hi); \ + __m64 mm_hi_lo = _mm_mulhi_pu16 (lo, mm_wh_lo); \ + __m64 mm_hi_hi = _mm_mulhi_pu16 (hi, mm_wh_hi); \ + lo = _mm_add_pi32 (_mm_unpacklo_pi16 (mm_lo_lo, mm_hi_lo), \ + _mm_unpacklo_pi16 (mm_lo_hi, mm_hi_hi)); \ + hi = _mm_add_pi32 (_mm_unpackhi_pi16 (mm_lo_lo, mm_hi_lo), \ + _mm_unpackhi_pi16 (mm_lo_hi, mm_hi_hi)); \ + } \ + mm_x = _mm_add_pi16 (mm_x, mm_ux); \ + /* shift and pack the result */ \ + hi = _mm_srli_pi32 (hi, BILINEAR_INTERPOLATION_BITS * 2); \ + lo = _mm_srli_pi32 (lo, BILINEAR_INTERPOLATION_BITS * 2); \ + lo = _mm_packs_pi32 (lo, hi); \ + lo = _mm_packs_pu16 (lo, lo); \ + pix = lo; \ +} while (0) + +#define BILINEAR_SKIP_ONE_PIXEL() \ +do { \ + vx += unit_x; \ + mm_x = _mm_add_pi16 (mm_x, mm_ux); \ +} while(0) + +static force_inline void +scaled_bilinear_scanline_mmx_8888_8888_SRC (uint32_t * dst, + const uint32_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + BILINEAR_DECLARE_VARIABLES; + __m64 pix; + + while (w--) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix); + store (dst, pix); + dst++; + } + + _mm_empty (); +} + +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_cover_SRC, + scaled_bilinear_scanline_mmx_8888_8888_SRC, + uint32_t, uint32_t, uint32_t, + COVER, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_pad_SRC, + scaled_bilinear_scanline_mmx_8888_8888_SRC, + uint32_t, uint32_t, uint32_t, + PAD, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_none_SRC, + scaled_bilinear_scanline_mmx_8888_8888_SRC, + uint32_t, uint32_t, uint32_t, + NONE, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_normal_SRC, + scaled_bilinear_scanline_mmx_8888_8888_SRC, + uint32_t, uint32_t, uint32_t, + NORMAL, FLAG_NONE) + +static force_inline void +scaled_bilinear_scanline_mmx_8888_8888_OVER (uint32_t * dst, + const uint32_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + BILINEAR_DECLARE_VARIABLES; + __m64 pix1, pix2; + + while (w) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + + if (!is_zero (pix1)) + { + pix2 = load (dst); + store8888 (dst, core_combine_over_u_pixel_mmx (pix1, pix2)); + } + + w--; + dst++; + } + + _mm_empty (); +} + +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_cover_OVER, + scaled_bilinear_scanline_mmx_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + COVER, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_pad_OVER, + scaled_bilinear_scanline_mmx_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + PAD, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_none_OVER, + scaled_bilinear_scanline_mmx_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + NONE, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_normal_OVER, + scaled_bilinear_scanline_mmx_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + NORMAL, FLAG_NONE) + +static force_inline void +scaled_bilinear_scanline_mmx_8888_8_8888_OVER (uint32_t * dst, + const uint8_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + BILINEAR_DECLARE_VARIABLES; + __m64 pix1, pix2; + uint32_t m; + + while (w) + { + m = (uint32_t) *mask++; + + if (m) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + + if (m == 0xff && is_opaque (pix1)) + { + store (dst, pix1); + } + else + { + __m64 ms, md, ma, msa; + + pix2 = load (dst); + ma = expand_alpha_rev (to_m64 (m)); + ms = _mm_unpacklo_pi8 (pix1, _mm_setzero_si64 ()); + md = _mm_unpacklo_pi8 (pix2, _mm_setzero_si64 ()); + + msa = expand_alpha (ms); + + store8888 (dst, (in_over (ms, msa, ma, md))); + } + } + else + { + BILINEAR_SKIP_ONE_PIXEL (); + } + + w--; + dst++; + } + + _mm_empty (); +} + +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_cover_OVER, + scaled_bilinear_scanline_mmx_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + COVER, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_pad_OVER, + scaled_bilinear_scanline_mmx_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + PAD, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_none_OVER, + scaled_bilinear_scanline_mmx_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + NONE, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_normal_OVER, + scaled_bilinear_scanline_mmx_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + NORMAL, FLAG_HAVE_NON_SOLID_MASK) + +static uint32_t * +mmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) +{ + int w = iter->width; + uint32_t *dst = iter->buffer; + uint32_t *src = (uint32_t *)iter->bits; + + iter->bits += iter->stride; + + while (w && ((uintptr_t)dst) & 7) + { + *dst++ = (*src++) | 0xff000000; + w--; + } + + while (w >= 8) + { + __m64 vsrc1 = ldq_u ((__m64 *)(src + 0)); + __m64 vsrc2 = ldq_u ((__m64 *)(src + 2)); + __m64 vsrc3 = ldq_u ((__m64 *)(src + 4)); + __m64 vsrc4 = ldq_u ((__m64 *)(src + 6)); + + *(__m64 *)(dst + 0) = _mm_or_si64 (vsrc1, MC (ff000000)); + *(__m64 *)(dst + 2) = _mm_or_si64 (vsrc2, MC (ff000000)); + *(__m64 *)(dst + 4) = _mm_or_si64 (vsrc3, MC (ff000000)); + *(__m64 *)(dst + 6) = _mm_or_si64 (vsrc4, MC (ff000000)); + + dst += 8; + src += 8; + w -= 8; + } + + while (w) + { + *dst++ = (*src++) | 0xff000000; + w--; + } + + _mm_empty (); + return iter->buffer; +} + +static uint32_t * +mmx_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) +{ + int w = iter->width; + uint32_t *dst = iter->buffer; + uint16_t *src = (uint16_t *)iter->bits; + + iter->bits += iter->stride; + + while (w && ((uintptr_t)dst) & 0x0f) + { + uint16_t s = *src++; + + *dst++ = convert_0565_to_8888 (s); + w--; + } + + while (w >= 4) + { + __m64 vsrc = ldq_u ((__m64 *)src); + __m64 mm0, mm1; + + expand_4xpacked565 (vsrc, &mm0, &mm1, 1); + + *(__m64 *)(dst + 0) = mm0; + *(__m64 *)(dst + 2) = mm1; + + dst += 4; + src += 4; + w -= 4; + } + + while (w) + { + uint16_t s = *src++; + + *dst++ = convert_0565_to_8888 (s); + w--; + } + + _mm_empty (); + return iter->buffer; +} + +static uint32_t * +mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) +{ + int w = iter->width; + uint32_t *dst = iter->buffer; + uint8_t *src = iter->bits; + + iter->bits += iter->stride; + + while (w && (((uintptr_t)dst) & 15)) + { + *dst++ = *(src++) << 24; + w--; + } + + while (w >= 8) + { + __m64 mm0 = ldq_u ((__m64 *)src); + + __m64 mm1 = _mm_unpacklo_pi8 (_mm_setzero_si64(), mm0); + __m64 mm2 = _mm_unpackhi_pi8 (_mm_setzero_si64(), mm0); + __m64 mm3 = _mm_unpacklo_pi16 (_mm_setzero_si64(), mm1); + __m64 mm4 = _mm_unpackhi_pi16 (_mm_setzero_si64(), mm1); + __m64 mm5 = _mm_unpacklo_pi16 (_mm_setzero_si64(), mm2); + __m64 mm6 = _mm_unpackhi_pi16 (_mm_setzero_si64(), mm2); + + *(__m64 *)(dst + 0) = mm3; + *(__m64 *)(dst + 2) = mm4; + *(__m64 *)(dst + 4) = mm5; + *(__m64 *)(dst + 6) = mm6; + + dst += 8; + src += 8; + w -= 8; + } + + while (w) + { + *dst++ = *(src++) << 24; + w--; + } + + _mm_empty (); + return iter->buffer; +} + +typedef struct +{ + pixman_format_code_t format; + pixman_iter_get_scanline_t get_scanline; +} fetcher_info_t; + +static const fetcher_info_t fetchers[] = +{ + { PIXMAN_x8r8g8b8, mmx_fetch_x8r8g8b8 }, + { PIXMAN_r5g6b5, mmx_fetch_r5g6b5 }, + { PIXMAN_a8, mmx_fetch_a8 }, + { PIXMAN_null } +}; + +static pixman_bool_t +mmx_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +{ + pixman_image_t *image = iter->image; + +#define FLAGS \ + (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ + FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) + + if ((iter->iter_flags & ITER_NARROW) && + (iter->image_flags & FLAGS) == FLAGS) + { + const fetcher_info_t *f; + + for (f = &fetchers[0]; f->format != PIXMAN_null; f++) + { + if (image->common.extended_format_code == f->format) + { + uint8_t *b = (uint8_t *)image->bits.bits; + int s = image->bits.rowstride * 4; + + iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; + iter->stride = s; + + iter->get_scanline = f->get_scanline; + return TRUE; + } + } + } + + return FALSE; +} static const pixman_fast_path_t mmx_fast_paths[] = { @@ -3244,18 +3974,14 @@ static const pixman_fast_path_t mmx_fast_paths[] = PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, mmx_composite_over_8888_n_8888 ), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, mmx_composite_over_8888_n_8888 ), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, mmx_composite_over_8888_n_8888 ), -#if 0 - /* FIXME: This code is commented out since it's apparently - * not actually faster than the generic code. - */ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, mmx_composite_over_x888_8_8888 ), PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, mmx_composite_over_x888_8_8888 ), - PIXMAN_STD_FAST_PATH (OVER, x8b8r8g8, a8, x8b8g8r8, mmx_composite_over_x888_8_8888 ), - PIXMAN_STD_FAST_PATH (OVER, x8b8r8g8, a8, a8r8g8b8, mmx_composite_over_x888_8_8888 ), -#endif + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, mmx_composite_over_x888_8_8888 ), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, mmx_composite_over_x888_8_8888 ), PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, mmx_composite_over_n_8888 ), PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, mmx_composite_over_n_8888 ), PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, mmx_composite_over_n_0565 ), + PIXMAN_STD_FAST_PATH (OVER, solid, null, b5g6r5, mmx_composite_over_n_0565 ), PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, mmx_composite_copy_area ), PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, mmx_composite_copy_area ), @@ -3266,11 +3992,20 @@ static const pixman_fast_path_t mmx_fast_paths[] = PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, mmx_composite_over_8888_8888 ), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, mmx_composite_over_8888_0565 ), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, mmx_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, mmx_composite_over_reverse_n_8888), + + PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, mmx_composite_add_0565_0565 ), + PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, mmx_composite_add_0565_0565 ), PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, mmx_composite_add_8888_8888 ), PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, mmx_composite_add_8888_8888 ), PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, mmx_composite_add_8_8 ), PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, mmx_composite_add_n_8_8 ), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, mmx_composite_src_x888_0565 ), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, mmx_composite_src_x888_0565 ), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, mmx_composite_src_x888_0565 ), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, mmx_composite_src_x888_0565 ), PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, mmx_composite_src_n_8_8888 ), PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, mmx_composite_src_n_8_8888 ), PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, mmx_composite_src_n_8_8888 ), @@ -3287,63 +4022,30 @@ static const pixman_fast_path_t mmx_fast_paths[] = PIXMAN_STD_FAST_PATH (IN, a8, null, a8, mmx_composite_in_8_8 ), PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, mmx_composite_in_n_8_8 ), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, mmx_8888_8888 ), + + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8888 ), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_8_8888 ), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_8_8888 ), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8_8888 ), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8_8888 ), + { PIXMAN_OP_NONE }, }; -static pixman_bool_t -mmx_blt (pixman_implementation_t *imp, - uint32_t * src_bits, - uint32_t * dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, - int src_y, - int dst_x, - int dst_y, - int width, - int height) -{ - if (!pixman_blt_mmx ( - src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, - src_x, src_y, dst_x, dst_y, width, height)) - - { - return _pixman_implementation_blt ( - imp->delegate, - src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, - src_x, src_y, dst_x, dst_y, width, height); - } - - return TRUE; -} - -static pixman_bool_t -mmx_fill (pixman_implementation_t *imp, - uint32_t * bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t xor) -{ - if (!pixman_fill_mmx (bits, stride, bpp, x, y, width, height, xor)) - { - return _pixman_implementation_fill ( - imp->delegate, bits, stride, bpp, x, y, width, height, xor); - } - - return TRUE; -} - pixman_implementation_t * -_pixman_implementation_create_mmx (void) +_pixman_implementation_create_mmx (pixman_implementation_t *fallback) { - pixman_implementation_t *general = _pixman_implementation_create_fast_path (); - pixman_implementation_t *imp = _pixman_implementation_create (general, mmx_fast_paths); + pixman_implementation_t *imp = _pixman_implementation_create (fallback, mmx_fast_paths); imp->combine_32[PIXMAN_OP_OVER] = mmx_combine_over_u; imp->combine_32[PIXMAN_OP_OVER_REVERSE] = mmx_combine_over_reverse_u; @@ -3372,7 +4074,9 @@ _pixman_implementation_create_mmx (void) imp->blt = mmx_blt; imp->fill = mmx_fill; + imp->src_iter_init = mmx_src_iter_init; + return imp; } -#endif /* USE_MMX */ +#endif /* USE_X86_MMX || USE_ARM_IWMMXT || USE_LOONGSON_MMI */ diff --git a/programs/develop/libraries/pixman/pixman-noop.c b/programs/develop/libraries/pixman/pixman-noop.c new file mode 100644 index 0000000000..e39996d9df --- /dev/null +++ b/programs/develop/libraries/pixman/pixman-noop.c @@ -0,0 +1,176 @@ +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ +/* + * Copyright © 2011 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include +#endif +#include +#include +#include "pixman-private.h" +#include "pixman-combine32.h" +#include "pixman-inlines.h" + +static void +noop_composite (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + return; +} + +static void +dest_write_back_direct (pixman_iter_t *iter) +{ + iter->buffer += iter->image->bits.rowstride; +} + +static uint32_t * +noop_get_scanline (pixman_iter_t *iter, const uint32_t *mask) +{ + uint32_t *result = iter->buffer; + + iter->buffer += iter->image->bits.rowstride; + + return result; +} + +static uint32_t * +get_scanline_null (pixman_iter_t *iter, const uint32_t *mask) +{ + return NULL; +} + +static pixman_bool_t +noop_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +{ + pixman_image_t *image = iter->image; + +#define FLAGS \ + (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM) + + if (!image) + { + iter->get_scanline = get_scanline_null; + } + else if ((iter->iter_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) == + (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) + { + iter->get_scanline = _pixman_iter_get_scanline_noop; + } + else if (image->common.extended_format_code == PIXMAN_solid && + (iter->image->type == SOLID || + (iter->image_flags & FAST_PATH_NO_ALPHA_MAP))) + { + if (iter->iter_flags & ITER_NARROW) + { + uint32_t *buffer = iter->buffer; + uint32_t *end = buffer + iter->width; + uint32_t color; + + if (image->type == SOLID) + color = image->solid.color_32; + else + color = image->bits.fetch_pixel_32 (&image->bits, 0, 0); + + while (buffer < end) + *(buffer++) = color; + } + else + { + argb_t *buffer = (argb_t *)iter->buffer; + argb_t *end = buffer + iter->width; + argb_t color; + + if (image->type == SOLID) + color = image->solid.color_float; + else + color = image->bits.fetch_pixel_float (&image->bits, 0, 0); + + while (buffer < end) + *(buffer++) = color; + } + + iter->get_scanline = _pixman_iter_get_scanline_noop; + } + else if (image->common.extended_format_code == PIXMAN_a8r8g8b8 && + (iter->iter_flags & ITER_NARROW) && + (iter->image_flags & FLAGS) == FLAGS && + iter->x >= 0 && iter->y >= 0 && + iter->x + iter->width <= image->bits.width && + iter->y + iter->height <= image->bits.height) + { + iter->buffer = + image->bits.bits + iter->y * image->bits.rowstride + iter->x; + + iter->get_scanline = noop_get_scanline; + } + else + { + return FALSE; + } + + return TRUE; +} + +static pixman_bool_t +noop_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +{ + pixman_image_t *image = iter->image; + uint32_t image_flags = iter->image_flags; + uint32_t iter_flags = iter->iter_flags; + + if ((image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS && + (iter_flags & ITER_NARROW) == ITER_NARROW && + ((image->common.extended_format_code == PIXMAN_a8r8g8b8) || + (image->common.extended_format_code == PIXMAN_x8r8g8b8 && + (iter_flags & (ITER_LOCALIZED_ALPHA))))) + { + iter->buffer = image->bits.bits + iter->y * image->bits.rowstride + iter->x; + + iter->get_scanline = _pixman_iter_get_scanline_noop; + iter->write_back = dest_write_back_direct; + + return TRUE; + } + else + { + return FALSE; + } +} + +static const pixman_fast_path_t noop_fast_paths[] = +{ + { PIXMAN_OP_DST, PIXMAN_any, 0, PIXMAN_any, 0, PIXMAN_any, 0, noop_composite }, + { PIXMAN_OP_NONE }, +}; + +pixman_implementation_t * +_pixman_implementation_create_noop (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = + _pixman_implementation_create (fallback, noop_fast_paths); + + imp->src_iter_init = noop_src_iter_init; + imp->dest_iter_init = noop_dest_iter_init; + + return imp; +} diff --git a/programs/develop/libraries/pixman/pixman-private.h b/programs/develop/libraries/pixman/pixman-private.h index e7eaca7649..6d9c05321d 100644 --- a/programs/develop/libraries/pixman/pixman-private.h +++ b/programs/develop/libraries/pixman/pixman-private.h @@ -1,7 +1,26 @@ +#include #ifndef PIXMAN_PRIVATE_H #define PIXMAN_PRIVATE_H +/* + * The defines which are shared between C and assembly code + */ + +/* bilinear interpolation precision (must be <= 8) */ +#define BILINEAR_INTERPOLATION_BITS 7 +#define BILINEAR_INTERPOLATION_RANGE (1 << BILINEAR_INTERPOLATION_BITS) + +/* + * C specific part + */ + +#ifndef __ASSEMBLER__ + +#ifndef PACKAGE +# error config.h must be included before pixman-private.h +#endif + #define PIXMAN_DISABLE_DEPRECATED #define PIXMAN_USE_INTERNAL_API @@ -10,6 +29,7 @@ #include #include #include +#include #include "pixman-compiler.h" @@ -17,7 +37,6 @@ * Images */ typedef struct image_common image_common_t; -typedef struct source_image source_image_t; typedef struct solid_fill solid_fill_t; typedef struct gradient gradient_t; typedef struct linear_gradient linear_gradient_t; @@ -28,6 +47,16 @@ typedef struct radial_gradient radial_gradient_t; typedef struct bits_image bits_image_t; typedef struct circle circle_t; +typedef struct argb_t argb_t; + +struct argb_t +{ + float a; + float r; + float g; + float b; +}; + typedef void (*fetch_scanline_t) (pixman_image_t *image, int x, int y, @@ -39,9 +68,9 @@ typedef uint32_t (*fetch_pixel_32_t) (bits_image_t *image, int x, int y); -typedef uint64_t (*fetch_pixel_64_t) (bits_image_t *image, - int x, - int y); +typedef argb_t (*fetch_pixel_float_t) (bits_image_t *image, + int x, + int y); typedef void (*store_scanline_t) (bits_image_t * image, int x, @@ -58,17 +87,6 @@ typedef enum SOLID } image_type_t; -typedef enum -{ - SOURCE_IMAGE_CLASS_UNKNOWN, - SOURCE_IMAGE_CLASS_HORIZONTAL, -} source_image_class_t; - -typedef source_image_class_t (*classify_func_t) (pixman_image_t *image, - int x, - int y, - int width, - int height); typedef void (*property_changed_func_t) (pixman_image_t *image); struct image_common @@ -93,10 +111,7 @@ struct image_common int alpha_origin_x; int alpha_origin_y; pixman_bool_t component_alpha; - classify_func_t classify; property_changed_func_t property_changed; - fetch_scanline_t get_scanline_32; - fetch_scanline_t get_scanline_64; pixman_image_destroy_func_t destroy_func; void * destroy_data; @@ -105,26 +120,20 @@ struct image_common pixman_format_code_t extended_format_code; }; -struct source_image -{ - image_common_t common; -}; - struct solid_fill { - source_image_t common; + image_common_t common; pixman_color_t color; uint32_t color_32; - uint64_t color_64; + argb_t color_float; }; struct gradient { - source_image_t common; + image_common_t common; int n_stops; pixman_gradient_stop_t *stops; - int stop_range; }; struct linear_gradient @@ -176,9 +185,9 @@ struct bits_image fetch_pixel_32_t fetch_pixel_32; store_scanline_t store_scanline_32; - fetch_scanline_t fetch_scanline_64; - fetch_pixel_64_t fetch_pixel_64; - store_scanline_t store_scanline_64; + fetch_scanline_t fetch_scanline_float; + fetch_pixel_float_t fetch_pixel_float; + store_scanline_t store_scanline_float; /* Used for indirect access to the bits */ pixman_read_memory_func_t read_func; @@ -190,7 +199,6 @@ union pixman_image image_type_t type; image_common_t common; bits_image_t bits; - source_image_t source; gradient_t gradient; linear_gradient_t linear; conical_gradient_t conical; @@ -198,59 +206,86 @@ union pixman_image solid_fill_t solid; }; +typedef struct pixman_iter_t pixman_iter_t; +typedef uint32_t *(* pixman_iter_get_scanline_t) (pixman_iter_t *iter, const uint32_t *mask); +typedef void (* pixman_iter_write_back_t) (pixman_iter_t *iter); + +typedef enum +{ + ITER_NARROW = (1 << 0), + + /* "Localized alpha" is when the alpha channel is used only to compute + * the alpha value of the destination. This means that the computation + * of the RGB values of the result is independent of the alpha value. + * + * For example, the OVER operator has localized alpha for the + * destination, because the RGB values of the result can be computed + * without knowing the destination alpha. Similarly, ADD has localized + * alpha for both source and destination because the RGB values of the + * result can be computed without knowing the alpha value of source or + * destination. + * + * When he destination is xRGB, this is useful knowledge, because then + * we can treat it as if it were ARGB, which means in some cases we can + * avoid copying it to a temporary buffer. + */ + ITER_LOCALIZED_ALPHA = (1 << 1), + ITER_IGNORE_ALPHA = (1 << 2), + ITER_IGNORE_RGB = (1 << 3) +} iter_flags_t; + +struct pixman_iter_t +{ + /* These are initialized by _pixman_implementation_{src,dest}_init */ + pixman_image_t * image; + uint32_t * buffer; + int x, y; + int width; + int height; + iter_flags_t iter_flags; + uint32_t image_flags; + + /* These function pointers are initialized by the implementation */ + pixman_iter_get_scanline_t get_scanline; + pixman_iter_write_back_t write_back; + + /* These fields are scratch data that implementations can use */ + void * data; + uint8_t * bits; + int stride; +}; + void _pixman_bits_image_setup_accessors (bits_image_t *image); void -_pixman_image_get_scanline_generic_64 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask); - -source_image_class_t -_pixman_image_classify (pixman_image_t *image, - int x, - int y, - int width, - int height); +_pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter); void -_pixman_image_get_scanline_32 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask); - -/* Even thought the type of buffer is uint32_t *, the function actually expects - * a uint64_t *buffer. - */ -void -_pixman_image_get_scanline_64 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *unused); +_pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter); void -_pixman_image_store_scanline_32 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *buffer); +_pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter); -/* Even though the type of buffer is uint32_t *, the function - * actually expects a uint64_t *buffer. - */ void -_pixman_image_store_scanline_64 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *buffer); +_pixman_radial_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter); + +void +_pixman_conical_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter); + +void +_pixman_image_init (pixman_image_t *image); + +pixman_bool_t +_pixman_bits_image_init (pixman_image_t * image, + pixman_format_code_t format, + int width, + int height, + uint32_t * bits, + int rowstride, + pixman_bool_t clear); +pixman_bool_t +_pixman_image_fini (pixman_image_t *image); pixman_image_t * _pixman_image_allocate (void); @@ -265,10 +300,6 @@ _pixman_image_reset_clip_region (pixman_image_t *image); void _pixman_image_validate (pixman_image_t *image); -uint32_t -_pixman_image_get_solid (pixman_image_t * image, - pixman_format_code_t format); - #define PIXMAN_IMAGE_GET_LINE(image, x, y, type, out_stride, line, mul) \ do \ { \ @@ -288,33 +319,32 @@ _pixman_image_get_solid (pixman_image_t * image, */ typedef struct { - uint32_t left_ag; - uint32_t left_rb; - uint32_t right_ag; - uint32_t right_rb; - int32_t left_x; - int32_t right_x; - int32_t stepper; + float a_s, a_b; + float r_s, r_b; + float g_s, g_b; + float b_s, b_b; + pixman_fixed_t left_x; + pixman_fixed_t right_x; pixman_gradient_stop_t *stops; int num_stops; - unsigned int spread; + pixman_repeat_t repeat; - int need_reset; + pixman_bool_t need_reset; } pixman_gradient_walker_t; void _pixman_gradient_walker_init (pixman_gradient_walker_t *walker, gradient_t * gradient, - unsigned int spread); + pixman_repeat_t repeat); void _pixman_gradient_walker_reset (pixman_gradient_walker_t *walker, - pixman_fixed_32_32_t pos); + pixman_fixed_48_16_t pos); uint32_t _pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker, - pixman_fixed_32_32_t x); + pixman_fixed_48_16_t x); /* * Edges @@ -352,6 +382,40 @@ pixman_rasterize_edges_accessors (pixman_image_t *image, */ typedef struct pixman_implementation_t pixman_implementation_t; +typedef struct +{ + pixman_op_t op; + pixman_image_t * src_image; + pixman_image_t * mask_image; + pixman_image_t * dest_image; + int32_t src_x; + int32_t src_y; + int32_t mask_x; + int32_t mask_y; + int32_t dest_x; + int32_t dest_y; + int32_t width; + int32_t height; + + uint32_t src_flags; + uint32_t mask_flags; + uint32_t dest_flags; +} pixman_composite_info_t; + +#define PIXMAN_COMPOSITE_ARGS(info) \ + MAYBE_UNUSED pixman_op_t op = info->op; \ + MAYBE_UNUSED pixman_image_t * src_image = info->src_image; \ + MAYBE_UNUSED pixman_image_t * mask_image = info->mask_image; \ + MAYBE_UNUSED pixman_image_t * dest_image = info->dest_image; \ + MAYBE_UNUSED int32_t src_x = info->src_x; \ + MAYBE_UNUSED int32_t src_y = info->src_y; \ + MAYBE_UNUSED int32_t mask_x = info->mask_x; \ + MAYBE_UNUSED int32_t mask_y = info->mask_y; \ + MAYBE_UNUSED int32_t dest_x = info->dest_x; \ + MAYBE_UNUSED int32_t dest_y = info->dest_y; \ + MAYBE_UNUSED int32_t width = info->width; \ + MAYBE_UNUSED int32_t height = info->height + typedef void (*pixman_combine_32_func_t) (pixman_implementation_t *imp, pixman_op_t op, uint32_t * dest, @@ -359,26 +423,15 @@ typedef void (*pixman_combine_32_func_t) (pixman_implementation_t *imp, const uint32_t * mask, int width); -typedef void (*pixman_combine_64_func_t) (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width); +typedef void (*pixman_combine_float_func_t) (pixman_implementation_t *imp, + pixman_op_t op, + float * dest, + const float * src, + const float * mask, + int n_pixels); typedef void (*pixman_composite_func_t) (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src, - pixman_image_t * mask, - pixman_image_t * dest, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height); + pixman_composite_info_t *info); typedef pixman_bool_t (*pixman_blt_func_t) (pixman_implementation_t *imp, uint32_t * src_bits, uint32_t * dst_bits, @@ -388,8 +441,8 @@ typedef pixman_bool_t (*pixman_blt_func_t) (pixman_implementation_t *imp, int dst_bpp, int src_x, int src_y, - int dst_x, - int dst_y, + int dest_x, + int dest_y, int width, int height); typedef pixman_bool_t (*pixman_fill_func_t) (pixman_implementation_t *imp, @@ -400,10 +453,12 @@ typedef pixman_bool_t (*pixman_fill_func_t) (pixman_implementation_t *imp, int y, int width, int height, - uint32_t xor); + uint32_t filler); +typedef pixman_bool_t (*pixman_iter_init_func_t) (pixman_implementation_t *imp, + pixman_iter_t *iter); void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp); -void _pixman_setup_combiner_functions_64 (pixman_implementation_t *imp); +void _pixman_setup_combiner_functions_float (pixman_implementation_t *imp); typedef struct { @@ -420,50 +475,46 @@ typedef struct struct pixman_implementation_t { pixman_implementation_t * toplevel; - pixman_implementation_t * delegate; + pixman_implementation_t * fallback; const pixman_fast_path_t * fast_paths; pixman_blt_func_t blt; pixman_fill_func_t fill; + pixman_iter_init_func_t src_iter_init; + pixman_iter_init_func_t dest_iter_init; pixman_combine_32_func_t combine_32[PIXMAN_N_OPERATORS]; pixman_combine_32_func_t combine_32_ca[PIXMAN_N_OPERATORS]; - pixman_combine_64_func_t combine_64[PIXMAN_N_OPERATORS]; - pixman_combine_64_func_t combine_64_ca[PIXMAN_N_OPERATORS]; + pixman_combine_float_func_t combine_float[PIXMAN_N_OPERATORS]; + pixman_combine_float_func_t combine_float_ca[PIXMAN_N_OPERATORS]; }; +uint32_t +_pixman_image_get_solid (pixman_implementation_t *imp, + pixman_image_t * image, + pixman_format_code_t format); + pixman_implementation_t * -_pixman_implementation_create (pixman_implementation_t *delegate, +_pixman_implementation_create (pixman_implementation_t *fallback, const pixman_fast_path_t *fast_paths); void -_pixman_implementation_combine_32 (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width); -void -_pixman_implementation_combine_64 (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width); -void -_pixman_implementation_combine_32_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width); -void -_pixman_implementation_combine_64_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width); +_pixman_implementation_lookup_composite (pixman_implementation_t *toplevel, + pixman_op_t op, + pixman_format_code_t src_format, + uint32_t src_flags, + pixman_format_code_t mask_format, + uint32_t mask_flags, + pixman_format_code_t dest_format, + uint32_t dest_flags, + pixman_implementation_t **out_imp, + pixman_composite_func_t *out_func); + +pixman_combine_32_func_t +_pixman_implementation_lookup_combiner (pixman_implementation_t *imp, + pixman_op_t op, + pixman_bool_t component_alpha, + pixman_bool_t wide); pixman_bool_t _pixman_implementation_blt (pixman_implementation_t *imp, @@ -475,8 +526,8 @@ _pixman_implementation_blt (pixman_implementation_t *imp, int dst_bpp, int src_x, int src_y, - int dst_x, - int dst_y, + int dest_x, + int dest_y, int width, int height); @@ -489,48 +540,112 @@ _pixman_implementation_fill (pixman_implementation_t *imp, int y, int width, int height, - uint32_t xor); + uint32_t filler); + +pixman_bool_t +_pixman_implementation_src_iter_init (pixman_implementation_t *imp, + pixman_iter_t *iter, + pixman_image_t *image, + int x, + int y, + int width, + int height, + uint8_t *buffer, + iter_flags_t flags, + uint32_t image_flags); + +pixman_bool_t +_pixman_implementation_dest_iter_init (pixman_implementation_t *imp, + pixman_iter_t *iter, + pixman_image_t *image, + int x, + int y, + int width, + int height, + uint8_t *buffer, + iter_flags_t flags, + uint32_t image_flags); /* Specific implementations */ pixman_implementation_t * _pixman_implementation_create_general (void); pixman_implementation_t * -_pixman_implementation_create_fast_path (void); +_pixman_implementation_create_fast_path (pixman_implementation_t *fallback); -#ifdef USE_MMX pixman_implementation_t * -_pixman_implementation_create_mmx (void); +_pixman_implementation_create_noop (pixman_implementation_t *fallback); + +#if defined USE_X86_MMX || defined USE_ARM_IWMMXT || defined USE_LOONGSON_MMI +pixman_implementation_t * +_pixman_implementation_create_mmx (pixman_implementation_t *fallback); #endif #ifdef USE_SSE2 pixman_implementation_t * -_pixman_implementation_create_sse2 (void); +_pixman_implementation_create_sse2 (pixman_implementation_t *fallback); #endif #ifdef USE_ARM_SIMD pixman_implementation_t * -_pixman_implementation_create_arm_simd (void); +_pixman_implementation_create_arm_simd (pixman_implementation_t *fallback); #endif #ifdef USE_ARM_NEON pixman_implementation_t * -_pixman_implementation_create_arm_neon (void); +_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback); +#endif + +#ifdef USE_MIPS_DSPR2 +pixman_implementation_t * +_pixman_implementation_create_mips_dspr2 (pixman_implementation_t *fallback); #endif #ifdef USE_VMX pixman_implementation_t * -_pixman_implementation_create_vmx (void); +_pixman_implementation_create_vmx (pixman_implementation_t *fallback); #endif +pixman_bool_t +_pixman_implementation_disabled (const char *name); + +pixman_implementation_t * +_pixman_x86_get_implementations (pixman_implementation_t *imp); + +pixman_implementation_t * +_pixman_arm_get_implementations (pixman_implementation_t *imp); + +pixman_implementation_t * +_pixman_ppc_get_implementations (pixman_implementation_t *imp); + +pixman_implementation_t * +_pixman_mips_get_implementations (pixman_implementation_t *imp); + pixman_implementation_t * _pixman_choose_implementation (void); +pixman_bool_t +_pixman_disabled (const char *name); /* * Utilities */ +pixman_bool_t +_pixman_compute_composite_region32 (pixman_region32_t * region, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dest_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height); +uint32_t * +_pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask); /* These "formats" all have depth 0, so they * will never clash with any real ones @@ -558,14 +673,19 @@ _pixman_choose_implementation (void); #define FAST_PATH_NEAREST_FILTER (1 << 11) #define FAST_PATH_HAS_TRANSFORM (1 << 12) #define FAST_PATH_IS_OPAQUE (1 << 13) -#define FAST_PATH_NEEDS_WORKAROUND (1 << 14) +#define FAST_PATH_NO_NORMAL_REPEAT (1 << 14) #define FAST_PATH_NO_NONE_REPEAT (1 << 15) -#define FAST_PATH_SAMPLES_COVER_CLIP (1 << 16) -#define FAST_PATH_X_UNIT_POSITIVE (1 << 17) -#define FAST_PATH_AFFINE_TRANSFORM (1 << 18) -#define FAST_PATH_Y_UNIT_ZERO (1 << 19) -#define FAST_PATH_BILINEAR_FILTER (1 << 20) -#define FAST_PATH_NO_NORMAL_REPEAT (1 << 21) +#define FAST_PATH_X_UNIT_POSITIVE (1 << 16) +#define FAST_PATH_AFFINE_TRANSFORM (1 << 17) +#define FAST_PATH_Y_UNIT_ZERO (1 << 18) +#define FAST_PATH_BILINEAR_FILTER (1 << 19) +#define FAST_PATH_ROTATE_90_TRANSFORM (1 << 20) +#define FAST_PATH_ROTATE_180_TRANSFORM (1 << 21) +#define FAST_PATH_ROTATE_270_TRANSFORM (1 << 22) +#define FAST_PATH_SAMPLES_COVER_CLIP_NEAREST (1 << 23) +#define FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR (1 << 24) +#define FAST_PATH_BITS_IMAGE (1 << 25) +#define FAST_PATH_SEPARABLE_CONVOLUTION_FILTER (1 << 26) #define FAST_PATH_PAD_REPEAT \ (FAST_PATH_NO_NONE_REPEAT | \ @@ -601,7 +721,7 @@ _pixman_choose_implementation (void); #define SOURCE_FLAGS(format) \ (FAST_PATH_STANDARD_FLAGS | \ ((PIXMAN_ ## format == PIXMAN_solid) ? \ - 0 : (FAST_PATH_SAMPLES_COVER_CLIP | FAST_PATH_ID_TRANSFORM))) + 0 : (FAST_PATH_SAMPLES_COVER_CLIP_NEAREST | FAST_PATH_NEAREST_FILTER | FAST_PATH_ID_TRANSFORM))) #define MASK_FLAGS(format, extra) \ ((PIXMAN_ ## format == PIXMAN_null) ? 0 : (SOURCE_FLAGS (format) | extra)) @@ -632,6 +752,24 @@ _pixman_choose_implementation (void); dest, FAST_PATH_STD_DEST_FLAGS, \ func) } +extern pixman_implementation_t *global_implementation; + +static force_inline pixman_implementation_t * +get_implementation (void) +{ +#ifndef TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR + if (!global_implementation) + global_implementation = _pixman_choose_implementation (); +#endif + return global_implementation; +} + +/* This function is exported for the sake of the test suite and not part + * of the ABI. + */ +PIXMAN_EXPORT pixman_implementation_t * +_pixman_internal_only_get_implementation (void); + /* Memory allocation helpers */ void * pixman_malloc_ab (unsigned int n, unsigned int b); @@ -640,23 +778,25 @@ void * pixman_malloc_abc (unsigned int a, unsigned int b, unsigned int c); pixman_bool_t -pixman_multiply_overflows_int (unsigned int a, unsigned int b); +_pixman_multiply_overflows_size (size_t a, size_t b); pixman_bool_t -pixman_addition_overflows_int (unsigned int a, unsigned int b); +_pixman_multiply_overflows_int (unsigned int a, unsigned int b); + +pixman_bool_t +_pixman_addition_overflows_int (unsigned int a, unsigned int b); /* Compositing utilities */ void -pixman_expand (uint64_t * dst, - const uint32_t * src, - pixman_format_code_t format, - int width); +pixman_expand_to_float (argb_t *dst, + const uint32_t *src, + pixman_format_code_t format, + int width); void -pixman_contract (uint32_t * dst, - const uint64_t *src, - int width); - +pixman_contract_from_float (uint32_t *dst, + const argb_t *src, + int width); /* Region Helpers */ pixman_bool_t @@ -667,6 +807,50 @@ pixman_bool_t pixman_region16_copy_from_region32 (pixman_region16_t *dst, pixman_region32_t *src); +/* Doubly linked lists */ +typedef struct pixman_link_t pixman_link_t; +struct pixman_link_t +{ + pixman_link_t *next; + pixman_link_t *prev; +}; + +typedef struct pixman_list_t pixman_list_t; +struct pixman_list_t +{ + pixman_link_t *head; + pixman_link_t *tail; +}; + +static force_inline void +pixman_list_init (pixman_list_t *list) +{ + list->head = (pixman_link_t *)list; + list->tail = (pixman_link_t *)list; +} + +static force_inline void +pixman_list_prepend (pixman_list_t *list, pixman_link_t *link) +{ + link->next = list->head; + link->prev = (pixman_link_t *)list; + list->head->prev = link; + list->head = link; +} + +static force_inline void +pixman_list_unlink (pixman_link_t *link) +{ + link->prev->next = link->next; + link->next->prev = link->prev; +} + +static force_inline void +pixman_list_move_to_front (pixman_list_t *list, pixman_link_t *link) +{ + pixman_list_unlink (link); + pixman_list_prepend (list, link); +} /* Misc macros */ @@ -696,29 +880,62 @@ pixman_region16_copy_from_region32 (pixman_region16_t *dst, #define CLIP(v, low, high) ((v) < (low) ? (low) : ((v) > (high) ? (high) : (v))) +#define FLOAT_IS_ZERO(f) (-FLT_MIN < (f) && (f) < FLT_MIN) + /* Conversion between 8888 and 0565 */ -#define CONVERT_8888_TO_0565(s) \ - ((((s) >> 3) & 0x001f) | \ - (((s) >> 5) & 0x07e0) | \ - (((s) >> 8) & 0xf800)) +static force_inline uint16_t +convert_8888_to_0565 (uint32_t s) +{ + /* The following code can be compiled into just 4 instructions on ARM */ + uint32_t a, b; + a = (s >> 3) & 0x1F001F; + b = s & 0xFC00; + a |= a >> 5; + a |= b >> 5; + return (uint16_t)a; +} -#define CONVERT_0565_TO_0888(s) \ - (((((s) << 3) & 0xf8) | (((s) >> 2) & 0x7)) | \ - ((((s) << 5) & 0xfc00) | (((s) >> 1) & 0x300)) | \ - ((((s) << 8) & 0xf80000) | (((s) << 3) & 0x70000))) +static force_inline uint32_t +convert_0565_to_0888 (uint16_t s) +{ + return (((((s) << 3) & 0xf8) | (((s) >> 2) & 0x7)) | + ((((s) << 5) & 0xfc00) | (((s) >> 1) & 0x300)) | + ((((s) << 8) & 0xf80000) | (((s) << 3) & 0x70000))); +} -#define CONVERT_0565_TO_8888(s) (CONVERT_0565_TO_0888(s) | 0xff000000) +static force_inline uint32_t +convert_0565_to_8888 (uint16_t s) +{ + return convert_0565_to_0888 (s) | 0xff000000; +} /* Trivial versions that are useful in macros */ -#define CONVERT_8888_TO_8888(s) (s) -#define CONVERT_0565_TO_0565(s) (s) + +static force_inline uint32_t +convert_8888_to_8888 (uint32_t s) +{ + return s; +} + +static force_inline uint32_t +convert_x888_to_8888 (uint32_t s) +{ + return s | 0xff000000; +} + +static force_inline uint16_t +convert_0565_to_0565 (uint16_t s) +{ + return s; +} #define PIXMAN_FORMAT_IS_WIDE(f) \ (PIXMAN_FORMAT_A (f) > 8 || \ PIXMAN_FORMAT_R (f) > 8 || \ PIXMAN_FORMAT_G (f) > 8 || \ - PIXMAN_FORMAT_B (f) > 8) + PIXMAN_FORMAT_B (f) > 8 || \ + PIXMAN_FORMAT_TYPE (f) == PIXMAN_TYPE_ARGB_SRGB) #ifdef WORDS_BIGENDIAN # define SCREEN_SHIFT_LEFT(x,n) ((x) << (n)) @@ -728,6 +945,52 @@ pixman_region16_copy_from_region32 (pixman_region16_t *dst, # define SCREEN_SHIFT_RIGHT(x,n) ((x) << (n)) #endif +static force_inline uint32_t +unorm_to_unorm (uint32_t val, int from_bits, int to_bits) +{ + uint32_t result; + + if (from_bits == 0) + return 0; + + /* Delete any extra bits */ + val &= ((1 << from_bits) - 1); + + if (from_bits >= to_bits) + return val >> (from_bits - to_bits); + + /* Start out with the high bit of val in the high bit of result. */ + result = val << (to_bits - from_bits); + + /* Copy the bits in result, doubling the number of bits each time, until + * we fill all to_bits. Unrolled manually because from_bits and to_bits + * are usually known statically, so the compiler can turn all of this + * into a few shifts. + */ +#define REPLICATE() \ + do \ + { \ + if (from_bits < to_bits) \ + { \ + result |= result >> from_bits; \ + \ + from_bits *= 2; \ + } \ + } \ + while (0) + + REPLICATE(); + REPLICATE(); + REPLICATE(); + REPLICATE(); + REPLICATE(); + + return result; +} + +uint16_t pixman_float_to_unorm (float f, int n_bits); +float pixman_unorm_to_float (uint16_t u, int n_bits); + /* * Various debugging code */ @@ -754,15 +1017,13 @@ pixman_region16_copy_from_region32 (pixman_region16_t *dst, #endif -#ifdef DEBUG - void _pixman_log_error (const char *function, const char *message); #define return_if_fail(expr) \ do \ { \ - if (!(expr)) \ + if (unlikely (!(expr))) \ { \ _pixman_log_error (FUNC, "The expression " # expr " was false"); \ return; \ @@ -773,7 +1034,7 @@ _pixman_log_error (const char *function, const char *message); #define return_val_if_fail(expr, retval) \ do \ { \ - if (!(expr)) \ + if (unlikely (!(expr))) \ { \ _pixman_log_error (FUNC, "The expression " # expr " was false"); \ return (retval); \ @@ -784,38 +1045,31 @@ _pixman_log_error (const char *function, const char *message); #define critical_if_fail(expr) \ do \ { \ - if (!(expr)) \ + if (unlikely (!(expr))) \ _pixman_log_error (FUNC, "The expression " # expr " was false"); \ } \ while (0) +/* + * Matrix + */ -#else +typedef struct { pixman_fixed_48_16_t v[3]; } pixman_vector_48_16_t; -#define _pixman_log_error(f,m) do { } while (0) \ +pixman_bool_t +pixman_transform_point_31_16 (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result); -#define return_if_fail(expr) \ - do \ - { \ - if (!(expr)) \ - return; \ - } \ - while (0) +void +pixman_transform_point_31_16_3d (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result); -#define return_val_if_fail(expr, retval) \ - do \ - { \ - if (!(expr)) \ - return (retval); \ - } \ - while (0) - -#define critical_if_fail(expr) \ - do \ - { \ - } \ - while (0) -#endif +void +pixman_transform_point_31_16_affine (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result); /* * Timers @@ -826,10 +1080,11 @@ _pixman_log_error (const char *function, const char *message); static inline uint64_t oil_profile_stamp_rdtsc (void) { - uint64_t ts; + uint32_t hi, lo; - __asm__ __volatile__ ("rdtsc\n" : "=A" (ts)); - return ts; + __asm__ __volatile__ ("rdtsc\n" : "=a" (lo), "=d" (hi)); + + return lo | (((uint64_t)hi) << 32); } #define OIL_STAMP oil_profile_stamp_rdtsc @@ -868,6 +1123,13 @@ void pixman_timer_register (pixman_timer_t *timer); timer ## tname.total += OIL_STAMP () - begin ## tname; \ } +#else + +#define TIMER_BEGIN(tname) +#define TIMER_END(tname) + #endif /* PIXMAN_TIMERS */ +#endif /* __ASSEMBLER__ */ + #endif /* PIXMAN_PRIVATE_H */ diff --git a/programs/develop/libraries/pixman/pixman-radial-gradient.c b/programs/develop/libraries/pixman/pixman-radial-gradient.c index fa5725840f..6a217963da 100644 --- a/programs/develop/libraries/pixman/pixman-radial-gradient.c +++ b/programs/develop/libraries/pixman/pixman-radial-gradient.c @@ -78,11 +78,11 @@ radial_compute_color (double a, { /* * In this function error propagation can lead to bad results: - * - det can have an unbound error (if b*b-a*c is very small), + * - discr can have an unbound error (if b*b-a*c is very small), * potentially making it the opposite sign of what it should have been * (thus clearing a pixel that would have been colored or vice-versa) - * or propagating the error to sqrtdet; - * if det has the wrong sign or b is very small, this can lead to bad + * or propagating the error to sqrtdiscr; + * if discr has the wrong sign or b is very small, this can lead to bad * results * * - the algorithm used to compute the solutions of the quadratic @@ -92,7 +92,7 @@ radial_compute_color (double a, * * - the above problems are worse if a is small (as inva becomes bigger) */ - double det; + double discr; if (a == 0) { @@ -109,22 +109,33 @@ radial_compute_color (double a, } else { - if (t * dr > mindr) + if (t * dr >= mindr) return _pixman_gradient_walker_pixel (walker, t); } return 0; } - det = fdot (b, a, 0, b, -c, 0); - if (det >= 0) + discr = fdot (b, a, 0, b, -c, 0); + if (discr >= 0) { - double sqrtdet, t0, t1; + double sqrtdiscr, t0, t1; - sqrtdet = sqrt (det); - t0 = (b + sqrtdet) * inva; - t1 = (b - sqrtdet) * inva; + sqrtdiscr = sqrt (discr); + t0 = (b + sqrtdiscr) * inva; + t1 = (b - sqrtdiscr) * inva; + /* + * The root that must be used is the biggest one that belongs + * to the valid range ([0,1] for PIXMAN_REPEAT_NONE, any + * solution that results in a positive radius otherwise). + * + * If a > 0, t0 is the biggest solution, so if it is valid, it + * is the correct result. + * + * If a < 0, only one of the solutions can be valid, so the + * order in which they are tested is not important. + */ if (repeat == PIXMAN_REPEAT_NONE) { if (0 <= t0 && t0 <= pixman_fixed_1) @@ -134,9 +145,9 @@ radial_compute_color (double a, } else { - if (t0 * dr > mindr) + if (t0 * dr >= mindr) return _pixman_gradient_walker_pixel (walker, t0); - else if (t1 * dr > mindr) + else if (t1 * dr >= mindr) return _pixman_gradient_walker_pixel (walker, t1); } } @@ -144,19 +155,14 @@ radial_compute_color (double a, return 0; } -static void -radial_gradient_get_scanline_32 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) +static uint32_t * +radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) { /* * Implementation of radial gradients following the PDF specification. * See section 8.7.4.5.4 Type 3 (Radial) Shadings of the PDF Reference * Manual (PDF 32000-1:2008 at the time of this writing). - * + * * In the radial gradient problem we are given two circles (c₁,r₁) and * (c₂,r₂) that define the gradient itself. * @@ -173,7 +179,7 @@ radial_gradient_get_scanline_32 (pixman_image_t *image, * * The graphical result is the same as drawing the valid (radius > 0) * circles with increasing t in [-inf, +inf] (or in [0,1] if the gradient - * is not repeated) using SOURCE operatior composition. + * is not repeated) using SOURCE operator composition. * * It looks like a cone pointing towards the viewer if the ending circle * is smaller than the starting one, a cone pointing inside the page if @@ -184,14 +190,14 @@ radial_gradient_get_scanline_32 (pixman_image_t *image, * in, compute the t values for that point, solving for t in: * * length((1-t)·c₁ + t·(c₂) - p) = (1-t)·r₁ + t·r₂ - * + * * Let's rewrite it in a simpler way, by defining some auxiliary * variables: * * cd = c₂ - c₁ * pd = p - c₁ * dr = r₂ - r₁ - * lenght(t·cd - pd) = r₁ + t·dr + * length(t·cd - pd) = r₁ + t·dr * * which actually means * @@ -217,7 +223,7 @@ radial_gradient_get_scanline_32 (pixman_image_t *image, * B = pdx·cdx + pdy·cdy + r₁·dr * C = pdx² + pdy² - r₁² * At² - 2Bt + C = 0 - * + * * The solutions (unless the equation degenerates because of A = 0) are: * * t = (B ± ⎷(B² - A·C)) / A @@ -233,9 +239,13 @@ radial_gradient_get_scanline_32 (pixman_image_t *image, * <=> for every p, the radiuses associated with the two t solutions * have opposite sign */ + pixman_image_t *image = iter->image; + int x = iter->x; + int y = iter->y; + int width = iter->width; + uint32_t *buffer = iter->buffer; gradient_t *gradient = (gradient_t *)image; - source_image_t *source = (source_image_t *)image; radial_gradient_t *radial = (radial_gradient_t *)image; uint32_t *end = buffer + width; pixman_gradient_walker_t walker; @@ -246,16 +256,16 @@ radial_gradient_get_scanline_32 (pixman_image_t *image, v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2; v.vector[2] = pixman_fixed_1; - _pixman_gradient_walker_init (&walker, gradient, source->common.repeat); + _pixman_gradient_walker_init (&walker, gradient, image->common.repeat); - if (source->common.transform) + if (image->common.transform) { - if (!pixman_transform_point_3d (source->common.transform, &v)) - return; - - unit.vector[0] = source->common.transform->matrix[0][0]; - unit.vector[1] = source->common.transform->matrix[1][0]; - unit.vector[2] = source->common.transform->matrix[2][0]; + if (!pixman_transform_point_3d (image->common.transform, &v)) + return iter->buffer; + + unit.vector[0] = image->common.transform->matrix[0][0]; + unit.vector[1] = image->common.transform->matrix[1][0]; + unit.vector[2] = image->common.transform->matrix[2][0]; } else { @@ -325,7 +335,7 @@ radial_gradient_get_scanline_32 (pixman_image_t *image, radial->delta.radius, radial->mindr, &walker, - source->common.repeat); + image->common.repeat); } b += db; @@ -370,14 +380,14 @@ radial_gradient_get_scanline_32 (pixman_image_t *image, radial->delta.radius, radial->mindr, &walker, - source->common.repeat); + image->common.repeat); } else { *buffer = 0; } } - + ++buffer; v.vector[0] += unit.vector[0]; @@ -385,18 +395,34 @@ radial_gradient_get_scanline_32 (pixman_image_t *image, v.vector[2] += unit.vector[2]; } } + + iter->y++; + return iter->buffer; } -static void -radial_gradient_property_changed (pixman_image_t *image) +static uint32_t * +radial_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) { - image->common.get_scanline_32 = radial_gradient_get_scanline_32; - image->common.get_scanline_64 = _pixman_image_get_scanline_generic_64; + uint32_t *buffer = radial_get_scanline_narrow (iter, NULL); + + pixman_expand_to_float ( + (argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width); + + return buffer; +} + +void +_pixman_radial_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter) +{ + if (iter->iter_flags & ITER_NARROW) + iter->get_scanline = radial_get_scanline_narrow; + else + iter->get_scanline = radial_get_scanline_wide; } PIXMAN_EXPORT pixman_image_t * -pixman_image_create_radial_gradient (pixman_point_fixed_t * inner, - pixman_point_fixed_t * outer, +pixman_image_create_radial_gradient (const pixman_point_fixed_t * inner, + const pixman_point_fixed_t * outer, pixman_fixed_t inner_radius, pixman_fixed_t outer_radius, const pixman_gradient_stop_t *stops, @@ -441,8 +467,5 @@ pixman_image_create_radial_gradient (pixman_point_fixed_t * inner, radial->mindr = -1. * pixman_fixed_1 * radial->c1.radius; - image->common.property_changed = radial_gradient_property_changed; - return image; } - diff --git a/programs/develop/libraries/pixman/pixman-region.c b/programs/develop/libraries/pixman/pixman-region.c index 4e7c8db75e..59bc9c7971 100644 --- a/programs/develop/libraries/pixman/pixman-region.c +++ b/programs/develop/libraries/pixman/pixman-region.c @@ -102,7 +102,11 @@ static const box_type_t PREFIX (_empty_box_) = { 0, 0, 0, 0 }; static const region_data_type_t PREFIX (_empty_data_) = { 0, 0 }; +#if defined (__llvm__) && !defined (__clang__) +static const volatile region_data_type_t PREFIX (_broken_data_) = { 0, 0 }; +#else static const region_data_type_t PREFIX (_broken_data_) = { 0, 0 }; +#endif static box_type_t *pixman_region_empty_box = (box_type_t *)&PREFIX (_empty_box_); @@ -198,7 +202,7 @@ PIXREGION_SZOF (size_t n) return size + sizeof(region_data_type_t); } -static void * +static region_data_type_t * alloc_data (size_t n) { size_t sz = PIXREGION_SZOF (n); @@ -738,8 +742,7 @@ typedef pixman_bool_t (*overlap_proc_ptr) (region_type_t *region, box_type_t * r2, box_type_t * r2_end, int y1, - int y2, - int * overlap); + int y2); static pixman_bool_t pixman_op (region_type_t * new_reg, /* Place to store result */ @@ -750,10 +753,10 @@ pixman_op (region_type_t * new_reg, /* Place to store result int append_non1, /* Append non-overlapping bands * in region 1 ? */ - int append_non2, /* Append non-overlapping bands + int append_non2 /* Append non-overlapping bands * in region 2 ? */ - int * overlap) + ) { box_type_t *r1; /* Pointer into first region */ box_type_t *r2; /* Pointer into 2d region */ @@ -824,8 +827,7 @@ pixman_op (region_type_t * new_reg, /* Place to store result { if (!pixman_rect_alloc (new_reg, new_size)) { - if (old_data) - free (old_data); + free (old_data); return FALSE; } } @@ -932,8 +934,7 @@ pixman_op (region_type_t * new_reg, /* Place to store result if (!(*overlap_func)(new_reg, r1, r1_band_end, r2, r2_band_end, - ytop, ybot, - overlap)) + ytop, ybot)) { goto bail; } @@ -1001,8 +1002,7 @@ pixman_op (region_type_t * new_reg, /* Place to store result APPEND_REGIONS (new_reg, r2_band_end, r2_end); } - if (old_data) - free (old_data); + free (old_data); if (!(numRects = new_reg->data->numRects)) { @@ -1023,8 +1023,7 @@ pixman_op (region_type_t * new_reg, /* Place to store result return TRUE; bail: - if (old_data) - free (old_data); + free (old_data); return pixman_break (new_reg); } @@ -1112,8 +1111,7 @@ pixman_region_intersect_o (region_type_t *region, box_type_t * r2, box_type_t * r2_end, int y1, - int y2, - int * overlap) + int y2) { int x1; int x2; @@ -1209,13 +1207,9 @@ PREFIX (_intersect) (region_type_t * new_reg, else { /* General purpose intersection */ - int overlap; /* result ignored */ - if (!pixman_op (new_reg, reg1, reg2, pixman_region_intersect_o, FALSE, FALSE, - &overlap)) - { + if (!pixman_op (new_reg, reg1, reg2, pixman_region_intersect_o, FALSE, FALSE)) return FALSE; - } pixman_set_extents (new_reg); } @@ -1230,9 +1224,6 @@ PREFIX (_intersect) (region_type_t * new_reg, if (r->x1 <= x2) \ { \ /* Merge with current rectangle */ \ - if (r->x1 < x2) \ - *overlap = TRUE; \ - \ if (x2 < r->x2) \ x2 = r->x2; \ } \ @@ -1272,8 +1263,7 @@ pixman_region_union_o (region_type_t *region, box_type_t * r2, box_type_t * r2_end, int y1, - int y2, - int * overlap) + int y2) { box_type_t *next_rect; int x1; /* left and right side of current union */ @@ -1382,8 +1372,6 @@ PREFIX (_union) (region_type_t *new_reg, region_type_t *reg1, region_type_t *reg2) { - int overlap; /* result ignored */ - /* Return TRUE if some overlap * between reg1, reg2 */ @@ -1449,7 +1437,7 @@ PREFIX (_union) (region_type_t *new_reg, return TRUE; } - if (!pixman_op (new_reg, reg1, reg2, pixman_region_union_o, TRUE, TRUE, &overlap)) + if (!pixman_op (new_reg, reg1, reg2, pixman_region_union_o, TRUE, TRUE)) return FALSE; new_reg->extents.x1 = MIN (reg1->extents.x1, reg2->extents.x1); @@ -1516,9 +1504,7 @@ quick_sort_rects ( r++; i++; } - - while (i != numRects && (r->y1 < y1 || (r->y1 == y1 && r->x1 < x1))) - ; + while (i != numRects && (r->y1 < y1 || (r->y1 == y1 && r->x1 < x1))); r = &(rects[j]); do @@ -1579,8 +1565,7 @@ quick_sort_rects ( */ static pixman_bool_t -validate (region_type_t * badreg, - int * overlap) +validate (region_type_t * badreg) { /* Descriptor for regions under construction in Step 2. */ typedef struct @@ -1605,7 +1590,6 @@ validate (region_type_t * badreg, region_type_t *hreg; /* ri[j_half].reg */ pixman_bool_t ret = TRUE; - *overlap = FALSE; if (!badreg->data) { GOOD (badreg); @@ -1679,9 +1663,6 @@ validate (region_type_t * badreg, if (box->x1 <= ri_box->x2) { /* Merge it with ri_box */ - if (box->x1 < ri_box->x2) - *overlap = TRUE; - if (box->x2 > ri_box->x2) ri_box->x2 = box->x2; } @@ -1785,7 +1766,7 @@ validate (region_type_t * badreg, reg = &ri[j].reg; hreg = &ri[j + half].reg; - if (!pixman_op (reg, reg, hreg, pixman_region_union_o, TRUE, TRUE, overlap)) + if (!pixman_op (reg, reg, hreg, pixman_region_union_o, TRUE, TRUE)) ret = FALSE; if (hreg->extents.x1 < reg->extents.x1) @@ -1853,8 +1834,7 @@ pixman_region_subtract_o (region_type_t * region, box_type_t * r2, box_type_t * r2_end, int y1, - int y2, - int * overlap) + int y2) { box_type_t * next_rect; int x1; @@ -1878,7 +1858,7 @@ pixman_region_subtract_o (region_type_t * region, else if (r2->x1 <= x1) { /* - * Subtrahend preceeds minuend: nuke left edge of minuend. + * Subtrahend precedes minuend: nuke left edge of minuend. */ x1 = r2->x2; if (x1 >= r1->x2) @@ -1978,8 +1958,6 @@ PREFIX (_subtract) (region_type_t *reg_d, region_type_t *reg_m, region_type_t *reg_s) { - int overlap; /* result ignored */ - GOOD (reg_m); GOOD (reg_s); GOOD (reg_d); @@ -2004,9 +1982,9 @@ PREFIX (_subtract) (region_type_t *reg_d, } /* Add those rectangles in region 1 that aren't in region 2, - do yucky substraction for overlaps, and + do yucky subtraction for overlaps, and just throw away rectangles in region 2 that aren't in region 1 */ - if (!pixman_op (reg_d, reg_m, reg_s, pixman_region_subtract_o, TRUE, FALSE, &overlap)) + if (!pixman_op (reg_d, reg_m, reg_s, pixman_region_subtract_o, TRUE, FALSE)) return FALSE; /* @@ -2040,15 +2018,13 @@ PREFIX (_subtract) (region_type_t *reg_d, * *----------------------------------------------------------------------- */ -pixman_bool_t -PIXMAN_EXPORT PREFIX (_inverse) (region_type_t *new_reg, /* Destination region */ - region_type_t *reg1, /* Region to invert */ - box_type_t * inv_rect) /* Bounding box for inversion */ +PIXMAN_EXPORT pixman_bool_t +PREFIX (_inverse) (region_type_t *new_reg, /* Destination region */ + region_type_t *reg1, /* Region to invert */ + box_type_t * inv_rect) /* Bounding box for inversion */ { region_type_t inv_reg; /* Quick and dirty region made from the * bounding box */ - int overlap; /* result ignored */ - GOOD (reg1); GOOD (new_reg); @@ -2066,12 +2042,12 @@ PIXMAN_EXPORT PREFIX (_inverse) (region_type_t *new_reg, /* Destination region } /* Add those rectangles in region 1 that aren't in region 2, - * do yucky substraction for overlaps, and + * do yucky subtraction for overlaps, and * just throw away rectangles in region 2 that aren't in region 1 */ inv_reg.extents = *inv_rect; inv_reg.data = (region_data_type_t *)NULL; - if (!pixman_op (new_reg, &inv_reg, reg1, pixman_region_subtract_o, TRUE, FALSE, &overlap)) + if (!pixman_op (new_reg, &inv_reg, reg1, pixman_region_subtract_o, TRUE, FALSE)) return FALSE; /* @@ -2086,6 +2062,40 @@ PIXMAN_EXPORT PREFIX (_inverse) (region_type_t *new_reg, /* Destination region return TRUE; } +/* In time O(log n), locate the first box whose y2 is greater than y. + * Return @end if no such box exists. + */ +static box_type_t * +find_box_for_y (box_type_t *begin, box_type_t *end, int y) +{ + box_type_t *mid; + + if (end == begin) + return end; + + if (end - begin == 1) + { + if (begin->y2 > y) + return begin; + else + return end; + } + + mid = begin + (end - begin) / 2; + if (mid->y2 > y) + { + /* If no box is found in [begin, mid], the function + * will return @mid, which is then known to be the + * correct answer. + */ + return find_box_for_y (begin, mid, y); + } + else + { + return find_box_for_y (mid, end, y); + } +} + /* * rect_in(region, rect) * This routine takes a pointer to a region and a pointer to a box @@ -2102,10 +2112,9 @@ PIXMAN_EXPORT PREFIX (_inverse) (region_type_t *new_reg, /* Destination region * partially in the region) or is outside the region (we reached a band * that doesn't overlap the box at all and part_in is false) */ - -pixman_region_overlap_t -PIXMAN_EXPORT PREFIX (_contains_rectangle) (region_type_t * region, - box_type_t * prect) +PIXMAN_EXPORT pixman_region_overlap_t +PREFIX (_contains_rectangle) (region_type_t * region, + box_type_t * prect) { box_type_t * pbox; box_type_t * pbox_end; @@ -2139,12 +2148,15 @@ PIXMAN_EXPORT PREFIX (_contains_rectangle) (region_type_t * region, /* can stop when both part_out and part_in are TRUE, or we reach prect->y2 */ for (pbox = PIXREGION_BOXPTR (region), pbox_end = pbox + numRects; - pbox != pbox_end; - pbox++) + pbox != pbox_end; + pbox++) { - - if (pbox->y2 <= y) - continue; /* getting up to speed or skipping remainder of band */ + /* getting up to speed or skipping remainder of band */ + if (pbox->y2 <= y) + { + if ((pbox = find_box_for_y (pbox, pbox_end, y)) == pbox_end) + break; + } if (pbox->y1 > y) { @@ -2319,6 +2331,16 @@ PREFIX (_reset) (region_type_t *region, box_type_t *box) region->data = NULL; } +PIXMAN_EXPORT void +PREFIX (_clear) (region_type_t *region) +{ + GOOD (region); + FREE_DATA (region); + + region->extents = *pixman_region_empty_box; + region->data = pixman_region_empty_data; +} + /* box is "return" value */ PIXMAN_EXPORT int PREFIX (_contains_point) (region_type_t * region, @@ -2342,13 +2364,13 @@ PREFIX (_contains_point) (region_type_t * region, return(TRUE); } - for (pbox = PIXREGION_BOXPTR (region), pbox_end = pbox + numRects; - pbox != pbox_end; - pbox++) - { - if (y >= pbox->y2) - continue; /* not there yet */ + pbox = PIXREGION_BOXPTR (region); + pbox_end = pbox + numRects; + pbox = find_box_for_y (pbox, pbox_end, y); + + for (;pbox != pbox_end; pbox++) + { if ((y < pbox->y1) || (x < pbox->x1)) break; /* missed it */ @@ -2528,7 +2550,7 @@ PREFIX (_init_rects) (region_type_t *region, /* Validate */ region->extents.x1 = region->extents.x2 = 0; - return validate (region, &i); + return validate (region); } #define READ(_ptr) (*(_ptr)) @@ -2545,8 +2567,7 @@ bitmap_addrect (region_type_t *reg, ((r-1)->y1 == ry1) && ((r-1)->y2 == ry2) && ((r-1)->x1 <= rx1) && ((r-1)->x2 >= rx2)))) { - if (!reg->data || - reg->data->numRects == reg->data->size) + if (reg->data->numRects == reg->data->size) { if (!pixman_rect_alloc (reg, 1)) return NULL; @@ -2590,6 +2611,8 @@ PREFIX (_init_from_image) (region_type_t *region, PREFIX(_init) (region); + critical_if_fail (region->data); + return_if_fail (image->type == BITS); return_if_fail (image->bits.format == PIXMAN_a1); diff --git a/programs/develop/libraries/pixman/pixman-solid-fill.c b/programs/develop/libraries/pixman/pixman-solid-fill.c index 1d911e99d9..5f9fef6306 100644 --- a/programs/develop/libraries/pixman/pixman-solid-fill.c +++ b/programs/develop/libraries/pixman/pixman-solid-fill.c @@ -26,56 +26,6 @@ #endif #include "pixman-private.h" -static void -solid_fill_get_scanline_32 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - uint32_t *end = buffer + width; - uint32_t color = image->solid.color_32; - - while (buffer < end) - *(buffer++) = color; - - return; -} - -static void -solid_fill_get_scanline_64 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - uint64_t *b = (uint64_t *)buffer; - uint64_t *e = b + width; - uint64_t color = image->solid.color_64; - - while (b < e) - *(b++) = color; -} - -static source_image_class_t -solid_fill_classify (pixman_image_t *image, - int x, - int y, - int width, - int height) -{ - return SOURCE_IMAGE_CLASS_HORIZONTAL; -} - -static void -solid_fill_property_changed (pixman_image_t *image) -{ - image->common.get_scanline_32 = solid_fill_get_scanline_32; - image->common.get_scanline_64 = solid_fill_get_scanline_64; -} - static uint32_t color_to_uint32 (const pixman_color_t *color) { @@ -86,18 +36,21 @@ color_to_uint32 (const pixman_color_t *color) (color->blue >> 8); } -static uint64_t -color_to_uint64 (const pixman_color_t *color) +static argb_t +color_to_float (const pixman_color_t *color) { - return - ((uint64_t)color->alpha << 48) | - ((uint64_t)color->red << 32) | - ((uint64_t)color->green << 16) | - ((uint64_t)color->blue); + argb_t result; + + result.a = pixman_unorm_to_float (color->alpha, 16); + result.r = pixman_unorm_to_float (color->red, 16); + result.g = pixman_unorm_to_float (color->green, 16); + result.b = pixman_unorm_to_float (color->blue, 16); + + return result; } PIXMAN_EXPORT pixman_image_t * -pixman_image_create_solid_fill (pixman_color_t *color) +pixman_image_create_solid_fill (const pixman_color_t *color) { pixman_image_t *img = _pixman_image_allocate (); @@ -107,10 +60,7 @@ pixman_image_create_solid_fill (pixman_color_t *color) img->type = SOLID; img->solid.color = *color; img->solid.color_32 = color_to_uint32 (color); - img->solid.color_64 = color_to_uint64 (color); - - img->common.classify = solid_fill_classify; - img->common.property_changed = solid_fill_property_changed; + img->solid.color_float = color_to_float (color); return img; } diff --git a/programs/develop/libraries/pixman/pixman-sse2.c b/programs/develop/libraries/pixman/pixman-sse2.c new file mode 100644 index 0000000000..863bc18ada --- /dev/null +++ b/programs/develop/libraries/pixman/pixman-sse2.c @@ -0,0 +1,6449 @@ +/* + * Copyright © 2008 Rodrigo Kumpera + * Copyright © 2008 André Tupinambá + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Red Hat not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. Red Hat makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Rodrigo Kumpera (kumpera@gmail.com) + * André Tupinambá (andrelrt@gmail.com) + * + * Based on work by Owen Taylor and Søren Sandmann + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include /* for _mm_shuffle_pi16 and _MM_SHUFFLE */ +#include /* for SSE2 intrinsics */ +#include "pixman-private.h" +#include "pixman-combine32.h" +#include "pixman-inlines.h" + +static __m128i mask_0080; +static __m128i mask_00ff; +static __m128i mask_0101; +static __m128i mask_ffff; +static __m128i mask_ff000000; +static __m128i mask_alpha; + +static __m128i mask_565_r; +static __m128i mask_565_g1, mask_565_g2; +static __m128i mask_565_b; +static __m128i mask_red; +static __m128i mask_green; +static __m128i mask_blue; + +static __m128i mask_565_fix_rb; +static __m128i mask_565_fix_g; + +static __m128i mask_565_rb; +static __m128i mask_565_pack_multiplier; + +static force_inline __m128i +unpack_32_1x128 (uint32_t data) +{ + return _mm_unpacklo_epi8 (_mm_cvtsi32_si128 (data), _mm_setzero_si128 ()); +} + +static force_inline void +unpack_128_2x128 (__m128i data, __m128i* data_lo, __m128i* data_hi) +{ + *data_lo = _mm_unpacklo_epi8 (data, _mm_setzero_si128 ()); + *data_hi = _mm_unpackhi_epi8 (data, _mm_setzero_si128 ()); +} + +static force_inline __m128i +unpack_565_to_8888 (__m128i lo) +{ + __m128i r, g, b, rb, t; + + r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), mask_red); + g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), mask_green); + b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), mask_blue); + + rb = _mm_or_si128 (r, b); + t = _mm_and_si128 (rb, mask_565_fix_rb); + t = _mm_srli_epi32 (t, 5); + rb = _mm_or_si128 (rb, t); + + t = _mm_and_si128 (g, mask_565_fix_g); + t = _mm_srli_epi32 (t, 6); + g = _mm_or_si128 (g, t); + + return _mm_or_si128 (rb, g); +} + +static force_inline void +unpack_565_128_4x128 (__m128i data, + __m128i* data0, + __m128i* data1, + __m128i* data2, + __m128i* data3) +{ + __m128i lo, hi; + + lo = _mm_unpacklo_epi16 (data, _mm_setzero_si128 ()); + hi = _mm_unpackhi_epi16 (data, _mm_setzero_si128 ()); + + lo = unpack_565_to_8888 (lo); + hi = unpack_565_to_8888 (hi); + + unpack_128_2x128 (lo, data0, data1); + unpack_128_2x128 (hi, data2, data3); +} + +static force_inline uint16_t +pack_565_32_16 (uint32_t pixel) +{ + return (uint16_t) (((pixel >> 8) & 0xf800) | + ((pixel >> 5) & 0x07e0) | + ((pixel >> 3) & 0x001f)); +} + +static force_inline __m128i +pack_2x128_128 (__m128i lo, __m128i hi) +{ + return _mm_packus_epi16 (lo, hi); +} + +static force_inline __m128i +pack_565_2packedx128_128 (__m128i lo, __m128i hi) +{ + __m128i rb0 = _mm_and_si128 (lo, mask_565_rb); + __m128i rb1 = _mm_and_si128 (hi, mask_565_rb); + + __m128i t0 = _mm_madd_epi16 (rb0, mask_565_pack_multiplier); + __m128i t1 = _mm_madd_epi16 (rb1, mask_565_pack_multiplier); + + __m128i g0 = _mm_and_si128 (lo, mask_green); + __m128i g1 = _mm_and_si128 (hi, mask_green); + + t0 = _mm_or_si128 (t0, g0); + t1 = _mm_or_si128 (t1, g1); + + /* Simulates _mm_packus_epi32 */ + t0 = _mm_slli_epi32 (t0, 16 - 5); + t1 = _mm_slli_epi32 (t1, 16 - 5); + t0 = _mm_srai_epi32 (t0, 16); + t1 = _mm_srai_epi32 (t1, 16); + return _mm_packs_epi32 (t0, t1); +} + +static force_inline __m128i +pack_565_2x128_128 (__m128i lo, __m128i hi) +{ + __m128i data; + __m128i r, g1, g2, b; + + data = pack_2x128_128 (lo, hi); + + r = _mm_and_si128 (data, mask_565_r); + g1 = _mm_and_si128 (_mm_slli_epi32 (data, 3), mask_565_g1); + g2 = _mm_and_si128 (_mm_srli_epi32 (data, 5), mask_565_g2); + b = _mm_and_si128 (_mm_srli_epi32 (data, 3), mask_565_b); + + return _mm_or_si128 (_mm_or_si128 (_mm_or_si128 (r, g1), g2), b); +} + +static force_inline __m128i +pack_565_4x128_128 (__m128i* xmm0, __m128i* xmm1, __m128i* xmm2, __m128i* xmm3) +{ + return _mm_packus_epi16 (pack_565_2x128_128 (*xmm0, *xmm1), + pack_565_2x128_128 (*xmm2, *xmm3)); +} + +static force_inline int +is_opaque (__m128i x) +{ + __m128i ffs = _mm_cmpeq_epi8 (x, x); + + return (_mm_movemask_epi8 (_mm_cmpeq_epi8 (x, ffs)) & 0x8888) == 0x8888; +} + +static force_inline int +is_zero (__m128i x) +{ + return _mm_movemask_epi8 ( + _mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) == 0xffff; +} + +static force_inline int +is_transparent (__m128i x) +{ + return (_mm_movemask_epi8 ( + _mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) & 0x8888) == 0x8888; +} + +static force_inline __m128i +expand_pixel_32_1x128 (uint32_t data) +{ + return _mm_shuffle_epi32 (unpack_32_1x128 (data), _MM_SHUFFLE (1, 0, 1, 0)); +} + +static force_inline __m128i +expand_alpha_1x128 (__m128i data) +{ + return _mm_shufflehi_epi16 (_mm_shufflelo_epi16 (data, + _MM_SHUFFLE (3, 3, 3, 3)), + _MM_SHUFFLE (3, 3, 3, 3)); +} + +static force_inline void +expand_alpha_2x128 (__m128i data_lo, + __m128i data_hi, + __m128i* alpha_lo, + __m128i* alpha_hi) +{ + __m128i lo, hi; + + lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 3, 3, 3)); + hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 3, 3, 3)); + + *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 3, 3, 3)); + *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 3, 3, 3)); +} + +static force_inline void +expand_alpha_rev_2x128 (__m128i data_lo, + __m128i data_hi, + __m128i* alpha_lo, + __m128i* alpha_hi) +{ + __m128i lo, hi; + + lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (0, 0, 0, 0)); + hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (0, 0, 0, 0)); + *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (0, 0, 0, 0)); + *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (0, 0, 0, 0)); +} + +static force_inline void +pix_multiply_2x128 (__m128i* data_lo, + __m128i* data_hi, + __m128i* alpha_lo, + __m128i* alpha_hi, + __m128i* ret_lo, + __m128i* ret_hi) +{ + __m128i lo, hi; + + lo = _mm_mullo_epi16 (*data_lo, *alpha_lo); + hi = _mm_mullo_epi16 (*data_hi, *alpha_hi); + lo = _mm_adds_epu16 (lo, mask_0080); + hi = _mm_adds_epu16 (hi, mask_0080); + *ret_lo = _mm_mulhi_epu16 (lo, mask_0101); + *ret_hi = _mm_mulhi_epu16 (hi, mask_0101); +} + +static force_inline void +pix_add_multiply_2x128 (__m128i* src_lo, + __m128i* src_hi, + __m128i* alpha_dst_lo, + __m128i* alpha_dst_hi, + __m128i* dst_lo, + __m128i* dst_hi, + __m128i* alpha_src_lo, + __m128i* alpha_src_hi, + __m128i* ret_lo, + __m128i* ret_hi) +{ + __m128i t1_lo, t1_hi; + __m128i t2_lo, t2_hi; + + pix_multiply_2x128 (src_lo, src_hi, alpha_dst_lo, alpha_dst_hi, &t1_lo, &t1_hi); + pix_multiply_2x128 (dst_lo, dst_hi, alpha_src_lo, alpha_src_hi, &t2_lo, &t2_hi); + + *ret_lo = _mm_adds_epu8 (t1_lo, t2_lo); + *ret_hi = _mm_adds_epu8 (t1_hi, t2_hi); +} + +static force_inline void +negate_2x128 (__m128i data_lo, + __m128i data_hi, + __m128i* neg_lo, + __m128i* neg_hi) +{ + *neg_lo = _mm_xor_si128 (data_lo, mask_00ff); + *neg_hi = _mm_xor_si128 (data_hi, mask_00ff); +} + +static force_inline void +invert_colors_2x128 (__m128i data_lo, + __m128i data_hi, + __m128i* inv_lo, + __m128i* inv_hi) +{ + __m128i lo, hi; + + lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 0, 1, 2)); + hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 0, 1, 2)); + *inv_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 0, 1, 2)); + *inv_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 0, 1, 2)); +} + +static force_inline void +over_2x128 (__m128i* src_lo, + __m128i* src_hi, + __m128i* alpha_lo, + __m128i* alpha_hi, + __m128i* dst_lo, + __m128i* dst_hi) +{ + __m128i t1, t2; + + negate_2x128 (*alpha_lo, *alpha_hi, &t1, &t2); + + pix_multiply_2x128 (dst_lo, dst_hi, &t1, &t2, dst_lo, dst_hi); + + *dst_lo = _mm_adds_epu8 (*src_lo, *dst_lo); + *dst_hi = _mm_adds_epu8 (*src_hi, *dst_hi); +} + +static force_inline void +over_rev_non_pre_2x128 (__m128i src_lo, + __m128i src_hi, + __m128i* dst_lo, + __m128i* dst_hi) +{ + __m128i lo, hi; + __m128i alpha_lo, alpha_hi; + + expand_alpha_2x128 (src_lo, src_hi, &alpha_lo, &alpha_hi); + + lo = _mm_or_si128 (alpha_lo, mask_alpha); + hi = _mm_or_si128 (alpha_hi, mask_alpha); + + invert_colors_2x128 (src_lo, src_hi, &src_lo, &src_hi); + + pix_multiply_2x128 (&src_lo, &src_hi, &lo, &hi, &lo, &hi); + + over_2x128 (&lo, &hi, &alpha_lo, &alpha_hi, dst_lo, dst_hi); +} + +static force_inline void +in_over_2x128 (__m128i* src_lo, + __m128i* src_hi, + __m128i* alpha_lo, + __m128i* alpha_hi, + __m128i* mask_lo, + __m128i* mask_hi, + __m128i* dst_lo, + __m128i* dst_hi) +{ + __m128i s_lo, s_hi; + __m128i a_lo, a_hi; + + pix_multiply_2x128 (src_lo, src_hi, mask_lo, mask_hi, &s_lo, &s_hi); + pix_multiply_2x128 (alpha_lo, alpha_hi, mask_lo, mask_hi, &a_lo, &a_hi); + + over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi); +} + +/* load 4 pixels from a 16-byte boundary aligned address */ +static force_inline __m128i +load_128_aligned (__m128i* src) +{ + return _mm_load_si128 (src); +} + +/* load 4 pixels from a unaligned address */ +static force_inline __m128i +load_128_unaligned (const __m128i* src) +{ + return _mm_loadu_si128 (src); +} + +/* save 4 pixels using Write Combining memory on a 16-byte + * boundary aligned address + */ +static force_inline void +save_128_write_combining (__m128i* dst, + __m128i data) +{ + _mm_stream_si128 (dst, data); +} + +/* save 4 pixels on a 16-byte boundary aligned address */ +static force_inline void +save_128_aligned (__m128i* dst, + __m128i data) +{ + _mm_store_si128 (dst, data); +} + +/* save 4 pixels on a unaligned address */ +static force_inline void +save_128_unaligned (__m128i* dst, + __m128i data) +{ + _mm_storeu_si128 (dst, data); +} + +static force_inline __m128i +load_32_1x128 (uint32_t data) +{ + return _mm_cvtsi32_si128 (data); +} + +static force_inline __m128i +expand_alpha_rev_1x128 (__m128i data) +{ + return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (0, 0, 0, 0)); +} + +static force_inline __m128i +expand_pixel_8_1x128 (uint8_t data) +{ + return _mm_shufflelo_epi16 ( + unpack_32_1x128 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0)); +} + +static force_inline __m128i +pix_multiply_1x128 (__m128i data, + __m128i alpha) +{ + return _mm_mulhi_epu16 (_mm_adds_epu16 (_mm_mullo_epi16 (data, alpha), + mask_0080), + mask_0101); +} + +static force_inline __m128i +pix_add_multiply_1x128 (__m128i* src, + __m128i* alpha_dst, + __m128i* dst, + __m128i* alpha_src) +{ + __m128i t1 = pix_multiply_1x128 (*src, *alpha_dst); + __m128i t2 = pix_multiply_1x128 (*dst, *alpha_src); + + return _mm_adds_epu8 (t1, t2); +} + +static force_inline __m128i +negate_1x128 (__m128i data) +{ + return _mm_xor_si128 (data, mask_00ff); +} + +static force_inline __m128i +invert_colors_1x128 (__m128i data) +{ + return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (3, 0, 1, 2)); +} + +static force_inline __m128i +over_1x128 (__m128i src, __m128i alpha, __m128i dst) +{ + return _mm_adds_epu8 (src, pix_multiply_1x128 (dst, negate_1x128 (alpha))); +} + +static force_inline __m128i +in_over_1x128 (__m128i* src, __m128i* alpha, __m128i* mask, __m128i* dst) +{ + return over_1x128 (pix_multiply_1x128 (*src, *mask), + pix_multiply_1x128 (*alpha, *mask), + *dst); +} + +static force_inline __m128i +over_rev_non_pre_1x128 (__m128i src, __m128i dst) +{ + __m128i alpha = expand_alpha_1x128 (src); + + return over_1x128 (pix_multiply_1x128 (invert_colors_1x128 (src), + _mm_or_si128 (alpha, mask_alpha)), + alpha, + dst); +} + +static force_inline uint32_t +pack_1x128_32 (__m128i data) +{ + return _mm_cvtsi128_si32 (_mm_packus_epi16 (data, _mm_setzero_si128 ())); +} + +static force_inline __m128i +expand565_16_1x128 (uint16_t pixel) +{ + __m128i m = _mm_cvtsi32_si128 (pixel); + + m = unpack_565_to_8888 (m); + + return _mm_unpacklo_epi8 (m, _mm_setzero_si128 ()); +} + +static force_inline uint32_t +core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst) +{ + uint8_t a; + __m128i xmms; + + a = src >> 24; + + if (a == 0xff) + { + return src; + } + else if (src) + { + xmms = unpack_32_1x128 (src); + return pack_1x128_32 ( + over_1x128 (xmms, expand_alpha_1x128 (xmms), + unpack_32_1x128 (dst))); + } + + return dst; +} + +static force_inline uint32_t +combine1 (const uint32_t *ps, const uint32_t *pm) +{ + uint32_t s = *ps; + + if (pm) + { + __m128i ms, mm; + + mm = unpack_32_1x128 (*pm); + mm = expand_alpha_1x128 (mm); + + ms = unpack_32_1x128 (s); + ms = pix_multiply_1x128 (ms, mm); + + s = pack_1x128_32 (ms); + } + + return s; +} + +static force_inline __m128i +combine4 (const __m128i *ps, const __m128i *pm) +{ + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_msk_lo, xmm_msk_hi; + __m128i s; + + if (pm) + { + xmm_msk_lo = load_128_unaligned (pm); + + if (is_transparent (xmm_msk_lo)) + return _mm_setzero_si128 (); + } + + s = load_128_unaligned (ps); + + if (pm) + { + unpack_128_2x128 (s, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_msk_lo, &xmm_msk_lo, &xmm_msk_hi); + + expand_alpha_2x128 (xmm_msk_lo, xmm_msk_hi, &xmm_msk_lo, &xmm_msk_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_msk_lo, &xmm_msk_hi, + &xmm_src_lo, &xmm_src_hi); + + s = pack_2x128_128 (xmm_src_lo, xmm_src_hi); + } + + return s; +} + +static force_inline void +core_combine_over_u_sse2_mask (uint32_t * pd, + const uint32_t* ps, + const uint32_t* pm, + int w) +{ + uint32_t s, d; + + /* Align dst on a 16-byte boundary */ + while (w && ((uintptr_t)pd & 15)) + { + d = *pd; + s = combine1 (ps, pm); + + if (s) + *pd = core_combine_over_u_pixel_sse2 (s, d); + pd++; + ps++; + pm++; + w--; + } + + while (w >= 4) + { + __m128i mask = load_128_unaligned ((__m128i *)pm); + + if (!is_zero (mask)) + { + __m128i src; + __m128i src_hi, src_lo; + __m128i mask_hi, mask_lo; + __m128i alpha_hi, alpha_lo; + + src = load_128_unaligned ((__m128i *)ps); + + if (is_opaque (_mm_and_si128 (src, mask))) + { + save_128_aligned ((__m128i *)pd, src); + } + else + { + __m128i dst = load_128_aligned ((__m128i *)pd); + __m128i dst_hi, dst_lo; + + unpack_128_2x128 (mask, &mask_lo, &mask_hi); + unpack_128_2x128 (src, &src_lo, &src_hi); + + expand_alpha_2x128 (mask_lo, mask_hi, &mask_lo, &mask_hi); + pix_multiply_2x128 (&src_lo, &src_hi, + &mask_lo, &mask_hi, + &src_lo, &src_hi); + + unpack_128_2x128 (dst, &dst_lo, &dst_hi); + + expand_alpha_2x128 (src_lo, src_hi, + &alpha_lo, &alpha_hi); + + over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi, + &dst_lo, &dst_hi); + + save_128_aligned ( + (__m128i *)pd, + pack_2x128_128 (dst_lo, dst_hi)); + } + } + + pm += 4; + ps += 4; + pd += 4; + w -= 4; + } + while (w) + { + d = *pd; + s = combine1 (ps, pm); + + if (s) + *pd = core_combine_over_u_pixel_sse2 (s, d); + pd++; + ps++; + pm++; + + w--; + } +} + +static force_inline void +core_combine_over_u_sse2_no_mask (uint32_t * pd, + const uint32_t* ps, + int w) +{ + uint32_t s, d; + + /* Align dst on a 16-byte boundary */ + while (w && ((uintptr_t)pd & 15)) + { + d = *pd; + s = *ps; + + if (s) + *pd = core_combine_over_u_pixel_sse2 (s, d); + pd++; + ps++; + w--; + } + + while (w >= 4) + { + __m128i src; + __m128i src_hi, src_lo, dst_hi, dst_lo; + __m128i alpha_hi, alpha_lo; + + src = load_128_unaligned ((__m128i *)ps); + + if (!is_zero (src)) + { + if (is_opaque (src)) + { + save_128_aligned ((__m128i *)pd, src); + } + else + { + __m128i dst = load_128_aligned ((__m128i *)pd); + + unpack_128_2x128 (src, &src_lo, &src_hi); + unpack_128_2x128 (dst, &dst_lo, &dst_hi); + + expand_alpha_2x128 (src_lo, src_hi, + &alpha_lo, &alpha_hi); + over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi, + &dst_lo, &dst_hi); + + save_128_aligned ( + (__m128i *)pd, + pack_2x128_128 (dst_lo, dst_hi)); + } + } + + ps += 4; + pd += 4; + w -= 4; + } + while (w) + { + d = *pd; + s = *ps; + + if (s) + *pd = core_combine_over_u_pixel_sse2 (s, d); + pd++; + ps++; + + w--; + } +} + +static force_inline void +sse2_combine_over_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + if (pm) + core_combine_over_u_sse2_mask (pd, ps, pm, w); + else + core_combine_over_u_sse2_no_mask (pd, ps, w); +} + +static void +sse2_combine_over_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, d; + + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_alpha_lo, xmm_alpha_hi; + + /* Align dst on a 16-byte boundary */ + while (w && + ((uintptr_t)pd & 15)) + { + d = *pd; + s = combine1 (ps, pm); + + *pd++ = core_combine_over_u_pixel_sse2 (d, s); + w--; + ps++; + if (pm) + pm++; + } + + while (w >= 4) + { + /* I'm loading unaligned because I'm not sure + * about the address alignment. + */ + xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); + xmm_dst_hi = load_128_aligned ((__m128i*) pd); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + over_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_src_lo, &xmm_src_hi); + + /* rebuid the 4 pixel data and save*/ + save_128_aligned ((__m128i*)pd, + pack_2x128_128 (xmm_src_lo, xmm_src_hi)); + + w -= 4; + ps += 4; + pd += 4; + + if (pm) + pm += 4; + } + + while (w) + { + d = *pd; + s = combine1 (ps, pm); + + *pd++ = core_combine_over_u_pixel_sse2 (d, s); + ps++; + w--; + if (pm) + pm++; + } +} + +static force_inline uint32_t +core_combine_in_u_pixel_sse2 (uint32_t src, uint32_t dst) +{ + uint32_t maska = src >> 24; + + if (maska == 0) + { + return 0; + } + else if (maska != 0xff) + { + return pack_1x128_32 ( + pix_multiply_1x128 (unpack_32_1x128 (dst), + expand_alpha_1x128 (unpack_32_1x128 (src)))); + } + + return dst; +} + +static void +sse2_combine_in_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, d; + + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + + while (w && ((uintptr_t)pd & 15)) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_in_u_pixel_sse2 (d, s); + w--; + ps++; + if (pm) + pm++; + } + + while (w >= 4) + { + xmm_dst_hi = load_128_aligned ((__m128i*) pd); + xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*) pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_dst_lo, &xmm_dst_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ((__m128i*)pd, + pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + w -= 4; + if (pm) + pm += 4; + } + + while (w) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_in_u_pixel_sse2 (d, s); + w--; + ps++; + if (pm) + pm++; + } +} + +static void +sse2_combine_in_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, d; + + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + + while (w && ((uintptr_t)pd & 15)) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_in_u_pixel_sse2 (s, d); + ps++; + w--; + if (pm) + pm++; + } + + while (w >= 4) + { + xmm_dst_hi = load_128_aligned ((__m128i*) pd); + xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_src_lo, &xmm_src_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + w -= 4; + if (pm) + pm += 4; + } + + while (w) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_in_u_pixel_sse2 (s, d); + w--; + ps++; + if (pm) + pm++; + } +} + +static void +sse2_combine_out_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + while (w && ((uintptr_t)pd & 15)) + { + uint32_t s = combine1 (ps, pm); + uint32_t d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + unpack_32_1x128 (d), negate_1x128 ( + expand_alpha_1x128 (unpack_32_1x128 (s))))); + + if (pm) + pm++; + ps++; + w--; + } + + while (w >= 4) + { + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + + xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); + xmm_dst_hi = load_128_aligned ((__m128i*) pd); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + negate_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + + pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_src_lo, &xmm_src_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + if (pm) + pm += 4; + + w -= 4; + } + + while (w) + { + uint32_t s = combine1 (ps, pm); + uint32_t d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + unpack_32_1x128 (d), negate_1x128 ( + expand_alpha_1x128 (unpack_32_1x128 (s))))); + ps++; + if (pm) + pm++; + w--; + } +} + +static void +sse2_combine_out_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + while (w && ((uintptr_t)pd & 15)) + { + uint32_t s = combine1 (ps, pm); + uint32_t d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + unpack_32_1x128 (s), negate_1x128 ( + expand_alpha_1x128 (unpack_32_1x128 (d))))); + w--; + ps++; + if (pm) + pm++; + } + + while (w >= 4) + { + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + + xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm); + xmm_dst_hi = load_128_aligned ((__m128i*) pd); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + negate_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_dst_lo, &xmm_dst_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + w -= 4; + if (pm) + pm += 4; + } + + while (w) + { + uint32_t s = combine1 (ps, pm); + uint32_t d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + unpack_32_1x128 (s), negate_1x128 ( + expand_alpha_1x128 (unpack_32_1x128 (d))))); + w--; + ps++; + if (pm) + pm++; + } +} + +static force_inline uint32_t +core_combine_atop_u_pixel_sse2 (uint32_t src, + uint32_t dst) +{ + __m128i s = unpack_32_1x128 (src); + __m128i d = unpack_32_1x128 (dst); + + __m128i sa = negate_1x128 (expand_alpha_1x128 (s)); + __m128i da = expand_alpha_1x128 (d); + + return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa)); +} + +static void +sse2_combine_atop_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, d; + + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; + __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; + + while (w && ((uintptr_t)pd & 15)) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_atop_u_pixel_sse2 (s, d); + w--; + ps++; + if (pm) + pm++; + } + + while (w >= 4) + { + xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); + xmm_dst_hi = load_128_aligned ((__m128i*) pd); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); + + pix_add_multiply_2x128 ( + &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, + &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + w -= 4; + if (pm) + pm += 4; + } + + while (w) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_atop_u_pixel_sse2 (s, d); + w--; + ps++; + if (pm) + pm++; + } +} + +static force_inline uint32_t +core_combine_reverse_atop_u_pixel_sse2 (uint32_t src, + uint32_t dst) +{ + __m128i s = unpack_32_1x128 (src); + __m128i d = unpack_32_1x128 (dst); + + __m128i sa = expand_alpha_1x128 (s); + __m128i da = negate_1x128 (expand_alpha_1x128 (d)); + + return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa)); +} + +static void +sse2_combine_atop_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, d; + + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; + __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; + + while (w && ((uintptr_t)pd & 15)) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d); + ps++; + w--; + if (pm) + pm++; + } + + while (w >= 4) + { + xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); + xmm_dst_hi = load_128_aligned ((__m128i*) pd); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + pix_add_multiply_2x128 ( + &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, + &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + w -= 4; + if (pm) + pm += 4; + } + + while (w) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d); + ps++; + w--; + if (pm) + pm++; + } +} + +static force_inline uint32_t +core_combine_xor_u_pixel_sse2 (uint32_t src, + uint32_t dst) +{ + __m128i s = unpack_32_1x128 (src); + __m128i d = unpack_32_1x128 (dst); + + __m128i neg_d = negate_1x128 (expand_alpha_1x128 (d)); + __m128i neg_s = negate_1x128 (expand_alpha_1x128 (s)); + + return pack_1x128_32 (pix_add_multiply_1x128 (&s, &neg_d, &d, &neg_s)); +} + +static void +sse2_combine_xor_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int w = width; + uint32_t s, d; + uint32_t* pd = dst; + const uint32_t* ps = src; + const uint32_t* pm = mask; + + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; + __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; + + while (w && ((uintptr_t)pd & 15)) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_xor_u_pixel_sse2 (s, d); + w--; + ps++; + if (pm) + pm++; + } + + while (w >= 4) + { + xmm_src = combine4 ((__m128i*) ps, (__m128i*) pm); + xmm_dst = load_128_aligned ((__m128i*) pd); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); + negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + pix_add_multiply_2x128 ( + &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, + &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + w -= 4; + if (pm) + pm += 4; + } + + while (w) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_xor_u_pixel_sse2 (s, d); + w--; + ps++; + if (pm) + pm++; + } +} + +static force_inline void +sse2_combine_add_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int w = width; + uint32_t s, d; + uint32_t* pd = dst; + const uint32_t* ps = src; + const uint32_t* pm = mask; + + while (w && (uintptr_t)pd & 15) + { + s = combine1 (ps, pm); + d = *pd; + + ps++; + if (pm) + pm++; + *pd++ = _mm_cvtsi128_si32 ( + _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d))); + w--; + } + + while (w >= 4) + { + __m128i s; + + s = combine4 ((__m128i*)ps, (__m128i*)pm); + + save_128_aligned ( + (__m128i*)pd, _mm_adds_epu8 (s, load_128_aligned ((__m128i*)pd))); + + pd += 4; + ps += 4; + if (pm) + pm += 4; + w -= 4; + } + + while (w--) + { + s = combine1 (ps, pm); + d = *pd; + + ps++; + *pd++ = _mm_cvtsi128_si32 ( + _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d))); + if (pm) + pm++; + } +} + +static force_inline uint32_t +core_combine_saturate_u_pixel_sse2 (uint32_t src, + uint32_t dst) +{ + __m128i ms = unpack_32_1x128 (src); + __m128i md = unpack_32_1x128 (dst); + uint32_t sa = src >> 24; + uint32_t da = ~dst >> 24; + + if (sa > da) + { + ms = pix_multiply_1x128 ( + ms, expand_alpha_1x128 (unpack_32_1x128 (DIV_UN8 (da, sa) << 24))); + } + + return pack_1x128_32 (_mm_adds_epu16 (md, ms)); +} + +static void +sse2_combine_saturate_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, d; + + uint32_t pack_cmp; + __m128i xmm_src, xmm_dst; + + while (w && (uintptr_t)pd & 15) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); + w--; + ps++; + if (pm) + pm++; + } + + while (w >= 4) + { + xmm_dst = load_128_aligned ((__m128i*)pd); + xmm_src = combine4 ((__m128i*)ps, (__m128i*)pm); + + pack_cmp = _mm_movemask_epi8 ( + _mm_cmpgt_epi32 ( + _mm_srli_epi32 (xmm_src, 24), + _mm_srli_epi32 (_mm_xor_si128 (xmm_dst, mask_ff000000), 24))); + + /* if some alpha src is grater than respective ~alpha dst */ + if (pack_cmp) + { + s = combine1 (ps++, pm); + d = *pd; + *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); + if (pm) + pm++; + + s = combine1 (ps++, pm); + d = *pd; + *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); + if (pm) + pm++; + + s = combine1 (ps++, pm); + d = *pd; + *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); + if (pm) + pm++; + + s = combine1 (ps++, pm); + d = *pd; + *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); + if (pm) + pm++; + } + else + { + save_128_aligned ((__m128i*)pd, _mm_adds_epu8 (xmm_dst, xmm_src)); + + pd += 4; + ps += 4; + if (pm) + pm += 4; + } + + w -= 4; + } + + while (w--) + { + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); + ps++; + if (pm) + pm++; + } +} + +static void +sse2_combine_src_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, m; + + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + + while (w && (uintptr_t)pd & 15) + { + s = *ps++; + m = *pm++; + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m))); + w--; + } + + while (w >= 4) + { + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + s = *ps++; + m = *pm++; + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m))); + w--; + } +} + +static force_inline uint32_t +core_combine_over_ca_pixel_sse2 (uint32_t src, + uint32_t mask, + uint32_t dst) +{ + __m128i s = unpack_32_1x128 (src); + __m128i expAlpha = expand_alpha_1x128 (s); + __m128i unpk_mask = unpack_32_1x128 (mask); + __m128i unpk_dst = unpack_32_1x128 (dst); + + return pack_1x128_32 (in_over_1x128 (&s, &expAlpha, &unpk_mask, &unpk_dst)); +} + +static void +sse2_combine_over_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, m, d; + + __m128i xmm_alpha_lo, xmm_alpha_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + + while (w && (uintptr_t)pd & 15) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d); + w--; + } + + while (w >= 4) + { + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d); + w--; + } +} + +static force_inline uint32_t +core_combine_over_reverse_ca_pixel_sse2 (uint32_t src, + uint32_t mask, + uint32_t dst) +{ + __m128i d = unpack_32_1x128 (dst); + + return pack_1x128_32 ( + over_1x128 (d, expand_alpha_1x128 (d), + pix_multiply_1x128 (unpack_32_1x128 (src), + unpack_32_1x128 (mask)))); +} + +static void +sse2_combine_over_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, m, d; + + __m128i xmm_alpha_lo, xmm_alpha_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + + while (w && (uintptr_t)pd & 15) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d); + w--; + } + + while (w >= 4) + { + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + over_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_mask_lo, &xmm_mask_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d); + w--; + } +} + +static void +sse2_combine_in_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, m, d; + + __m128i xmm_alpha_lo, xmm_alpha_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + + while (w && (uintptr_t)pd & 15) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)), + expand_alpha_1x128 (unpack_32_1x128 (d)))); + + w--; + } + + while (w >= 4) + { + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); + + pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + pix_multiply_1x128 ( + unpack_32_1x128 (s), unpack_32_1x128 (m)), + expand_alpha_1x128 (unpack_32_1x128 (d)))); + + w--; + } +} + +static void +sse2_combine_in_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, m, d; + + __m128i xmm_alpha_lo, xmm_alpha_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + + while (w && (uintptr_t)pd & 15) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + unpack_32_1x128 (d), + pix_multiply_1x128 (unpack_32_1x128 (m), + expand_alpha_1x128 (unpack_32_1x128 (s))))); + w--; + } + + while (w >= 4) + { + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + unpack_32_1x128 (d), + pix_multiply_1x128 (unpack_32_1x128 (m), + expand_alpha_1x128 (unpack_32_1x128 (s))))); + w--; + } +} + +static void +sse2_combine_out_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, m, d; + + __m128i xmm_alpha_lo, xmm_alpha_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + + while (w && (uintptr_t)pd & 15) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + pix_multiply_1x128 ( + unpack_32_1x128 (s), unpack_32_1x128 (m)), + negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d))))); + w--; + } + + while (w >= 4) + { + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + negate_2x128 (xmm_alpha_lo, xmm_alpha_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); + pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + pix_multiply_1x128 ( + unpack_32_1x128 (s), unpack_32_1x128 (m)), + negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d))))); + + w--; + } +} + +static void +sse2_combine_out_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, m, d; + + __m128i xmm_alpha_lo, xmm_alpha_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + + while (w && (uintptr_t)pd & 15) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + unpack_32_1x128 (d), + negate_1x128 (pix_multiply_1x128 ( + unpack_32_1x128 (m), + expand_alpha_1x128 (unpack_32_1x128 (s)))))); + w--; + } + + while (w >= 4) + { + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_mask_lo, &xmm_mask_hi); + + negate_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x128_32 ( + pix_multiply_1x128 ( + unpack_32_1x128 (d), + negate_1x128 (pix_multiply_1x128 ( + unpack_32_1x128 (m), + expand_alpha_1x128 (unpack_32_1x128 (s)))))); + w--; + } +} + +static force_inline uint32_t +core_combine_atop_ca_pixel_sse2 (uint32_t src, + uint32_t mask, + uint32_t dst) +{ + __m128i m = unpack_32_1x128 (mask); + __m128i s = unpack_32_1x128 (src); + __m128i d = unpack_32_1x128 (dst); + __m128i sa = expand_alpha_1x128 (s); + __m128i da = expand_alpha_1x128 (d); + + s = pix_multiply_1x128 (s, m); + m = negate_1x128 (pix_multiply_1x128 (m, sa)); + + return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da)); +} + +static void +sse2_combine_atop_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, m, d; + + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; + __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + + while (w && (uintptr_t)pd & 15) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d); + w--; + } + + while (w >= 4) + { + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_src_lo, &xmm_src_hi); + pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi, + &xmm_mask_lo, &xmm_mask_hi); + + negate_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + pix_add_multiply_2x128 ( + &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi, + &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d); + w--; + } +} + +static force_inline uint32_t +core_combine_reverse_atop_ca_pixel_sse2 (uint32_t src, + uint32_t mask, + uint32_t dst) +{ + __m128i m = unpack_32_1x128 (mask); + __m128i s = unpack_32_1x128 (src); + __m128i d = unpack_32_1x128 (dst); + + __m128i da = negate_1x128 (expand_alpha_1x128 (d)); + __m128i sa = expand_alpha_1x128 (s); + + s = pix_multiply_1x128 (s, m); + m = pix_multiply_1x128 (m, sa); + + return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da)); +} + +static void +sse2_combine_atop_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, m, d; + + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; + __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + + while (w && (uintptr_t)pd & 15) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d); + w--; + } + + while (w >= 4) + { + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_src_lo, &xmm_src_hi); + pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi, + &xmm_mask_lo, &xmm_mask_hi); + + negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + pix_add_multiply_2x128 ( + &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi, + &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d); + w--; + } +} + +static force_inline uint32_t +core_combine_xor_ca_pixel_sse2 (uint32_t src, + uint32_t mask, + uint32_t dst) +{ + __m128i a = unpack_32_1x128 (mask); + __m128i s = unpack_32_1x128 (src); + __m128i d = unpack_32_1x128 (dst); + + __m128i alpha_dst = negate_1x128 (pix_multiply_1x128 ( + a, expand_alpha_1x128 (s))); + __m128i dest = pix_multiply_1x128 (s, a); + __m128i alpha_src = negate_1x128 (expand_alpha_1x128 (d)); + + return pack_1x128_32 (pix_add_multiply_1x128 (&d, + &alpha_dst, + &dest, + &alpha_src)); +} + +static void +sse2_combine_xor_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, m, d; + + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; + __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + + while (w && (uintptr_t)pd & 15) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d); + w--; + } + + while (w >= 4) + { + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_src_lo, &xmm_src_hi); + pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi, + &xmm_mask_lo, &xmm_mask_hi); + + negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + negate_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + pix_add_multiply_2x128 ( + &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi, + &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d); + w--; + } +} + +static void +sse2_combine_add_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, m, d; + + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + + while (w && (uintptr_t)pd & 15) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x128_32 ( + _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s), + unpack_32_1x128 (m)), + unpack_32_1x128 (d))); + w--; + } + + while (w >= 4) + { + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_src_lo, &xmm_src_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 ( + _mm_adds_epu8 (xmm_src_lo, xmm_dst_lo), + _mm_adds_epu8 (xmm_src_hi, xmm_dst_hi))); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x128_32 ( + _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s), + unpack_32_1x128 (m)), + unpack_32_1x128 (d))); + w--; + } +} + +static force_inline __m128i +create_mask_16_128 (uint16_t mask) +{ + return _mm_set1_epi16 (mask); +} + +/* Work around a code generation bug in Sun Studio 12. */ +#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590) +# define create_mask_2x32_128(mask0, mask1) \ + (_mm_set_epi32 ((mask0), (mask1), (mask0), (mask1))) +#else +static force_inline __m128i +create_mask_2x32_128 (uint32_t mask0, + uint32_t mask1) +{ + return _mm_set_epi32 (mask0, mask1, mask0, mask1); +} +#endif + +static void +sse2_composite_over_n_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; + uint32_t *dst_line, *dst, d; + int32_t w; + int dst_stride; + __m128i xmm_src, xmm_alpha; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + + xmm_src = expand_pixel_32_1x128 (src); + xmm_alpha = expand_alpha_1x128 (xmm_src); + + while (height--) + { + dst = dst_line; + + dst_line += dst_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + d = *dst; + *dst++ = pack_1x128_32 (over_1x128 (xmm_src, + xmm_alpha, + unpack_32_1x128 (d))); + w--; + } + + while (w >= 4) + { + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_dst_lo, &xmm_dst_hi); + + /* rebuid the 4 pixel data and save*/ + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + w -= 4; + dst += 4; + } + + while (w) + { + d = *dst; + *dst++ = pack_1x128_32 (over_1x128 (xmm_src, + xmm_alpha, + unpack_32_1x128 (d))); + w--; + } + + } +} + +static void +sse2_composite_over_n_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; + uint16_t *dst_line, *dst, d; + int32_t w; + int dst_stride; + __m128i xmm_src, xmm_alpha; + __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + + xmm_src = expand_pixel_32_1x128 (src); + xmm_alpha = expand_alpha_1x128 (xmm_src); + + while (height--) + { + dst = dst_line; + + dst_line += dst_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + d = *dst; + + *dst++ = pack_565_32_16 ( + pack_1x128_32 (over_1x128 (xmm_src, + xmm_alpha, + expand565_16_1x128 (d)))); + w--; + } + + while (w >= 8) + { + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_565_128_4x128 (xmm_dst, + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); + + over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_dst0, &xmm_dst1); + over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_dst2, &xmm_dst3); + + xmm_dst = pack_565_4x128_128 ( + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); + + save_128_aligned ((__m128i*)dst, xmm_dst); + + dst += 8; + w -= 8; + } + + while (w--) + { + d = *dst; + *dst++ = pack_565_32_16 ( + pack_1x128_32 (over_1x128 (xmm_src, xmm_alpha, + expand565_16_1x128 (d)))); + } + } + +} + +static void +sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; + uint32_t *dst_line, d; + uint32_t *mask_line, m; + uint32_t pack_cmp; + int dst_stride, mask_stride; + + __m128i xmm_src; + __m128i xmm_dst; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + + __m128i mmx_src, mmx_mask, mmx_dest; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + + xmm_src = _mm_unpacklo_epi8 ( + create_mask_2x32_128 (src, src), _mm_setzero_si128 ()); + mmx_src = xmm_src; + + while (height--) + { + int w = width; + const uint32_t *pm = (uint32_t *)mask_line; + uint32_t *pd = (uint32_t *)dst_line; + + dst_line += dst_stride; + mask_line += mask_stride; + + while (w && (uintptr_t)pd & 15) + { + m = *pm++; + + if (m) + { + d = *pd; + + mmx_mask = unpack_32_1x128 (m); + mmx_dest = unpack_32_1x128 (d); + + *pd = pack_1x128_32 ( + _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src), + mmx_dest)); + } + + pd++; + w--; + } + + while (w >= 4) + { + xmm_mask = load_128_unaligned ((__m128i*)pm); + + pack_cmp = + _mm_movemask_epi8 ( + _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); + + /* if all bits in mask are zero, pack_cmp are equal to 0xffff */ + if (pack_cmp != 0xffff) + { + xmm_dst = load_128_aligned ((__m128i*)pd); + + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + + pix_multiply_2x128 (&xmm_src, &xmm_src, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + xmm_mask_hi = pack_2x128_128 (xmm_mask_lo, xmm_mask_hi); + + save_128_aligned ( + (__m128i*)pd, _mm_adds_epu8 (xmm_mask_hi, xmm_dst)); + } + + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + m = *pm++; + + if (m) + { + d = *pd; + + mmx_mask = unpack_32_1x128 (m); + mmx_dest = unpack_32_1x128 (d); + + *pd = pack_1x128_32 ( + _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src), + mmx_dest)); + } + + pd++; + w--; + } + } + +} + +static void +sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; + uint32_t *dst_line, d; + uint32_t *mask_line, m; + uint32_t pack_cmp; + int dst_stride, mask_stride; + + __m128i xmm_src, xmm_alpha; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + + __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + + xmm_src = _mm_unpacklo_epi8 ( + create_mask_2x32_128 (src, src), _mm_setzero_si128 ()); + xmm_alpha = expand_alpha_1x128 (xmm_src); + mmx_src = xmm_src; + mmx_alpha = xmm_alpha; + + while (height--) + { + int w = width; + const uint32_t *pm = (uint32_t *)mask_line; + uint32_t *pd = (uint32_t *)dst_line; + + dst_line += dst_stride; + mask_line += mask_stride; + + while (w && (uintptr_t)pd & 15) + { + m = *pm++; + + if (m) + { + d = *pd; + mmx_mask = unpack_32_1x128 (m); + mmx_dest = unpack_32_1x128 (d); + + *pd = pack_1x128_32 (in_over_1x128 (&mmx_src, + &mmx_alpha, + &mmx_mask, + &mmx_dest)); + } + + pd++; + w--; + } + + while (w >= 4) + { + xmm_mask = load_128_unaligned ((__m128i*)pm); + + pack_cmp = + _mm_movemask_epi8 ( + _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); + + /* if all bits in mask are zero, pack_cmp are equal to 0xffff */ + if (pack_cmp != 0xffff) + { + xmm_dst = load_128_aligned ((__m128i*)pd); + + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + in_over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + m = *pm++; + + if (m) + { + d = *pd; + mmx_mask = unpack_32_1x128 (m); + mmx_dest = unpack_32_1x128 (d); + + *pd = pack_1x128_32 ( + in_over_1x128 (&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)); + } + + pd++; + w--; + } + } + +} + +static void +sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + uint32_t mask; + int32_t w; + int dst_stride, src_stride; + + __m128i xmm_mask; + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_lo, xmm_alpha_hi; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8); + + xmm_mask = create_mask_16_128 (mask >> 24); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + uint32_t s = *src++; + + if (s) + { + uint32_t d = *dst; + + __m128i ms = unpack_32_1x128 (s); + __m128i alpha = expand_alpha_1x128 (ms); + __m128i dest = xmm_mask; + __m128i alpha_dst = unpack_32_1x128 (d); + + *dst = pack_1x128_32 ( + in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); + } + dst++; + w--; + } + + while (w >= 4) + { + xmm_src = load_128_unaligned ((__m128i*)src); + + if (!is_zero (xmm_src)) + { + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_mask, &xmm_mask, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + dst += 4; + src += 4; + w -= 4; + } + + while (w) + { + uint32_t s = *src++; + + if (s) + { + uint32_t d = *dst; + + __m128i ms = unpack_32_1x128 (s); + __m128i alpha = expand_alpha_1x128 (ms); + __m128i mask = xmm_mask; + __m128i dest = unpack_32_1x128 (d); + + *dst = pack_1x128_32 ( + in_over_1x128 (&ms, &alpha, &mask, &dest)); + } + + dst++; + w--; + } + } + +} + +static void +sse2_composite_src_x888_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint16_t *dst_line, *dst; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + s = *src++; + *dst = convert_8888_to_0565 (s); + dst++; + w--; + } + + while (w >= 8) + { + __m128i xmm_src0 = load_128_unaligned ((__m128i *)src + 0); + __m128i xmm_src1 = load_128_unaligned ((__m128i *)src + 1); + + save_128_aligned ((__m128i*)dst, pack_565_2packedx128_128 (xmm_src0, xmm_src1)); + + w -= 8; + src += 8; + dst += 8; + } + + while (w) + { + s = *src++; + *dst = convert_8888_to_0565 (s); + dst++; + w--; + } + } +} + +static void +sse2_composite_src_x888_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int32_t w; + int dst_stride, src_stride; + + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + *dst++ = *src++ | 0xff000000; + w--; + } + + while (w >= 16) + { + __m128i xmm_src1, xmm_src2, xmm_src3, xmm_src4; + + xmm_src1 = load_128_unaligned ((__m128i*)src + 0); + xmm_src2 = load_128_unaligned ((__m128i*)src + 1); + xmm_src3 = load_128_unaligned ((__m128i*)src + 2); + xmm_src4 = load_128_unaligned ((__m128i*)src + 3); + + save_128_aligned ((__m128i*)dst + 0, _mm_or_si128 (xmm_src1, mask_ff000000)); + save_128_aligned ((__m128i*)dst + 1, _mm_or_si128 (xmm_src2, mask_ff000000)); + save_128_aligned ((__m128i*)dst + 2, _mm_or_si128 (xmm_src3, mask_ff000000)); + save_128_aligned ((__m128i*)dst + 3, _mm_or_si128 (xmm_src4, mask_ff000000)); + + dst += 16; + src += 16; + w -= 16; + } + + while (w) + { + *dst++ = *src++ | 0xff000000; + w--; + } + } + +} + +static void +sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + uint32_t mask; + int dst_stride, src_stride; + int32_t w; + + __m128i xmm_mask, xmm_alpha; + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8); + + xmm_mask = create_mask_16_128 (mask >> 24); + xmm_alpha = mask_00ff; + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + uint32_t s = (*src++) | 0xff000000; + uint32_t d = *dst; + + __m128i src = unpack_32_1x128 (s); + __m128i alpha = xmm_alpha; + __m128i mask = xmm_mask; + __m128i dest = unpack_32_1x128 (d); + + *dst++ = pack_1x128_32 ( + in_over_1x128 (&src, &alpha, &mask, &dest)); + + w--; + } + + while (w >= 4) + { + xmm_src = _mm_or_si128 ( + load_128_unaligned ((__m128i*)src), mask_ff000000); + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha, &xmm_alpha, + &xmm_mask, &xmm_mask, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + dst += 4; + src += 4; + w -= 4; + + } + + while (w) + { + uint32_t s = (*src++) | 0xff000000; + uint32_t d = *dst; + + __m128i src = unpack_32_1x128 (s); + __m128i alpha = xmm_alpha; + __m128i mask = xmm_mask; + __m128i dest = unpack_32_1x128 (d); + + *dst++ = pack_1x128_32 ( + in_over_1x128 (&src, &alpha, &mask, &dest)); + + w--; + } + } + +} + +static void +sse2_composite_over_8888_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + int dst_stride, src_stride; + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + dst = dst_line; + src = src_line; + + while (height--) + { + sse2_combine_over_u (imp, op, dst, src, NULL, width); + + dst += dst_stride; + src += src_stride; + } +} + +static force_inline uint16_t +composite_over_8888_0565pixel (uint32_t src, uint16_t dst) +{ + __m128i ms; + + ms = unpack_32_1x128 (src); + return pack_565_32_16 ( + pack_1x128_32 ( + over_1x128 ( + ms, expand_alpha_1x128 (ms), expand565_16_1x128 (dst)))); +} + +static void +sse2_composite_over_8888_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint16_t *dst_line, *dst, d; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + int32_t w; + + __m128i xmm_alpha_lo, xmm_alpha_hi; + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + src = src_line; + + dst_line += dst_stride; + src_line += src_stride; + w = width; + + /* Align dst on a 16-byte boundary */ + while (w && + ((uintptr_t)dst & 15)) + { + s = *src++; + d = *dst; + + *dst++ = composite_over_8888_0565pixel (s, d); + w--; + } + + /* It's a 8 pixel loop */ + while (w >= 8) + { + /* I'm loading unaligned because I'm not sure + * about the address alignment. + */ + xmm_src = load_128_unaligned ((__m128i*) src); + xmm_dst = load_128_aligned ((__m128i*) dst); + + /* Unpacking */ + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_565_128_4x128 (xmm_dst, + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + /* I'm loading next 4 pixels from memory + * before to optimze the memory read. + */ + xmm_src = load_128_unaligned ((__m128i*) (src + 4)); + + over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_dst0, &xmm_dst1); + + /* Unpacking */ + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_dst2, &xmm_dst3); + + save_128_aligned ( + (__m128i*)dst, pack_565_4x128_128 ( + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); + + w -= 8; + dst += 8; + src += 8; + } + + while (w--) + { + s = *src++; + d = *dst; + + *dst++ = composite_over_8888_0565pixel (s, d); + } + } + +} + +static void +sse2_composite_over_n_8_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca; + uint32_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + int32_t w; + uint32_t m, d; + + __m128i xmm_src, xmm_alpha, xmm_def; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + + __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + xmm_def = create_mask_2x32_128 (src, src); + xmm_src = expand_pixel_32_1x128 (src); + xmm_alpha = expand_alpha_1x128 (xmm_src); + mmx_src = xmm_src; + mmx_alpha = xmm_alpha; + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + uint8_t m = *mask++; + + if (m) + { + d = *dst; + mmx_mask = expand_pixel_8_1x128 (m); + mmx_dest = unpack_32_1x128 (d); + + *dst = pack_1x128_32 (in_over_1x128 (&mmx_src, + &mmx_alpha, + &mmx_mask, + &mmx_dest)); + } + + w--; + dst++; + } + + while (w >= 4) + { + m = *((uint32_t*)mask); + + if (srca == 0xff && m == 0xffffffff) + { + save_128_aligned ((__m128i*)dst, xmm_def); + } + else if (m) + { + xmm_dst = load_128_aligned ((__m128i*) dst); + xmm_mask = unpack_32_1x128 (m); + xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); + + /* Unpacking */ + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + in_over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + w -= 4; + dst += 4; + mask += 4; + } + + while (w) + { + uint8_t m = *mask++; + + if (m) + { + d = *dst; + mmx_mask = expand_pixel_8_1x128 (m); + mmx_dest = unpack_32_1x128 (d); + + *dst = pack_1x128_32 (in_over_1x128 (&mmx_src, + &mmx_alpha, + &mmx_mask, + &mmx_dest)); + } + + w--; + dst++; + } + } + +} + +#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) +__attribute__((__force_align_arg_pointer__)) +#endif +static pixman_bool_t +sse2_fill (pixman_implementation_t *imp, + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t filler) +{ + uint32_t byte_width; + uint8_t *byte_line; + + __m128i xmm_def; + + if (bpp == 8) + { + uint8_t b; + uint16_t w; + + stride = stride * (int) sizeof (uint32_t) / 1; + byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x); + byte_width = width; + stride *= 1; + + b = filler & 0xff; + w = (b << 8) | b; + filler = (w << 16) | w; + } + else if (bpp == 16) + { + stride = stride * (int) sizeof (uint32_t) / 2; + byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x); + byte_width = 2 * width; + stride *= 2; + + filler = (filler & 0xffff) * 0x00010001; + } + else if (bpp == 32) + { + stride = stride * (int) sizeof (uint32_t) / 4; + byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x); + byte_width = 4 * width; + stride *= 4; + } + else + { + return FALSE; + } + + xmm_def = create_mask_2x32_128 (filler, filler); + + while (height--) + { + int w; + uint8_t *d = byte_line; + byte_line += stride; + w = byte_width; + + if (w >= 1 && ((uintptr_t)d & 1)) + { + *(uint8_t *)d = filler; + w -= 1; + d += 1; + } + + while (w >= 2 && ((uintptr_t)d & 3)) + { + *(uint16_t *)d = filler; + w -= 2; + d += 2; + } + + while (w >= 4 && ((uintptr_t)d & 15)) + { + *(uint32_t *)d = filler; + + w -= 4; + d += 4; + } + + while (w >= 128) + { + save_128_aligned ((__m128i*)(d), xmm_def); + save_128_aligned ((__m128i*)(d + 16), xmm_def); + save_128_aligned ((__m128i*)(d + 32), xmm_def); + save_128_aligned ((__m128i*)(d + 48), xmm_def); + save_128_aligned ((__m128i*)(d + 64), xmm_def); + save_128_aligned ((__m128i*)(d + 80), xmm_def); + save_128_aligned ((__m128i*)(d + 96), xmm_def); + save_128_aligned ((__m128i*)(d + 112), xmm_def); + + d += 128; + w -= 128; + } + + if (w >= 64) + { + save_128_aligned ((__m128i*)(d), xmm_def); + save_128_aligned ((__m128i*)(d + 16), xmm_def); + save_128_aligned ((__m128i*)(d + 32), xmm_def); + save_128_aligned ((__m128i*)(d + 48), xmm_def); + + d += 64; + w -= 64; + } + + if (w >= 32) + { + save_128_aligned ((__m128i*)(d), xmm_def); + save_128_aligned ((__m128i*)(d + 16), xmm_def); + + d += 32; + w -= 32; + } + + if (w >= 16) + { + save_128_aligned ((__m128i*)(d), xmm_def); + + d += 16; + w -= 16; + } + + while (w >= 4) + { + *(uint32_t *)d = filler; + + w -= 4; + d += 4; + } + + if (w >= 2) + { + *(uint16_t *)d = filler; + w -= 2; + d += 2; + } + + if (w >= 1) + { + *(uint8_t *)d = filler; + w -= 1; + d += 1; + } + } + + return TRUE; +} + +static void +sse2_composite_src_n_8_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca; + uint32_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + int32_t w; + uint32_t m; + + __m128i xmm_src, xmm_def; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + srca = src >> 24; + if (src == 0) + { + sse2_fill (imp, dest_image->bits.bits, dest_image->bits.rowstride, + PIXMAN_FORMAT_BPP (dest_image->bits.format), + dest_x, dest_y, width, height, 0); + return; + } + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + xmm_def = create_mask_2x32_128 (src, src); + xmm_src = expand_pixel_32_1x128 (src); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + uint8_t m = *mask++; + + if (m) + { + *dst = pack_1x128_32 ( + pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m))); + } + else + { + *dst = 0; + } + + w--; + dst++; + } + + while (w >= 4) + { + m = *((uint32_t*)mask); + + if (srca == 0xff && m == 0xffffffff) + { + save_128_aligned ((__m128i*)dst, xmm_def); + } + else if (m) + { + xmm_mask = unpack_32_1x128 (m); + xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); + + /* Unpacking */ + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + pix_multiply_2x128 (&xmm_src, &xmm_src, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi)); + } + else + { + save_128_aligned ((__m128i*)dst, _mm_setzero_si128 ()); + } + + w -= 4; + dst += 4; + mask += 4; + } + + while (w) + { + uint8_t m = *mask++; + + if (m) + { + *dst = pack_1x128_32 ( + pix_multiply_1x128 ( + xmm_src, expand_pixel_8_1x128 (m))); + } + else + { + *dst = 0; + } + + w--; + dst++; + } + } + +} + +static void +sse2_composite_over_n_8_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; + uint16_t *dst_line, *dst, d; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + int32_t w; + uint32_t m; + __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest; + + __m128i xmm_src, xmm_alpha; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + xmm_src = expand_pixel_32_1x128 (src); + xmm_alpha = expand_alpha_1x128 (xmm_src); + mmx_src = xmm_src; + mmx_alpha = xmm_alpha; + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + m = *mask++; + + if (m) + { + d = *dst; + mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m)); + mmx_dest = expand565_16_1x128 (d); + + *dst = pack_565_32_16 ( + pack_1x128_32 ( + in_over_1x128 ( + &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); + } + + w--; + dst++; + } + + while (w >= 8) + { + xmm_dst = load_128_aligned ((__m128i*) dst); + unpack_565_128_4x128 (xmm_dst, + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); + + m = *((uint32_t*)mask); + mask += 4; + + if (m) + { + xmm_mask = unpack_32_1x128 (m); + xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); + + /* Unpacking */ + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + in_over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst0, &xmm_dst1); + } + + m = *((uint32_t*)mask); + mask += 4; + + if (m) + { + xmm_mask = unpack_32_1x128 (m); + xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); + + /* Unpacking */ + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + in_over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst2, &xmm_dst3); + } + + save_128_aligned ( + (__m128i*)dst, pack_565_4x128_128 ( + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); + + w -= 8; + dst += 8; + } + + while (w) + { + m = *mask++; + + if (m) + { + d = *dst; + mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m)); + mmx_dest = expand565_16_1x128 (d); + + *dst = pack_565_32_16 ( + pack_1x128_32 ( + in_over_1x128 ( + &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); + } + + w--; + dst++; + } + } + +} + +static void +sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint16_t *dst_line, *dst, d; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + int32_t w; + uint32_t opaque, zero; + + __m128i ms; + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + s = *src++; + d = *dst; + + ms = unpack_32_1x128 (s); + + *dst++ = pack_565_32_16 ( + pack_1x128_32 ( + over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d)))); + w--; + } + + while (w >= 8) + { + /* First round */ + xmm_src = load_128_unaligned ((__m128i*)src); + xmm_dst = load_128_aligned ((__m128i*)dst); + + opaque = is_opaque (xmm_src); + zero = is_zero (xmm_src); + + unpack_565_128_4x128 (xmm_dst, + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + + /* preload next round*/ + xmm_src = load_128_unaligned ((__m128i*)(src + 4)); + + if (opaque) + { + invert_colors_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_dst0, &xmm_dst1); + } + else if (!zero) + { + over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_dst0, &xmm_dst1); + } + + /* Second round */ + opaque = is_opaque (xmm_src); + zero = is_zero (xmm_src); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + + if (opaque) + { + invert_colors_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_dst2, &xmm_dst3); + } + else if (!zero) + { + over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_dst2, &xmm_dst3); + } + + save_128_aligned ( + (__m128i*)dst, pack_565_4x128_128 ( + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); + + w -= 8; + src += 8; + dst += 8; + } + + while (w) + { + s = *src++; + d = *dst; + + ms = unpack_32_1x128 (s); + + *dst++ = pack_565_32_16 ( + pack_1x128_32 ( + over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d)))); + w--; + } + } + +} + +static void +sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst, d; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + int32_t w; + uint32_t opaque, zero; + + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + s = *src++; + d = *dst; + + *dst++ = pack_1x128_32 ( + over_rev_non_pre_1x128 ( + unpack_32_1x128 (s), unpack_32_1x128 (d))); + + w--; + } + + while (w >= 4) + { + xmm_src_hi = load_128_unaligned ((__m128i*)src); + + opaque = is_opaque (xmm_src_hi); + zero = is_zero (xmm_src_hi); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + + if (opaque) + { + invert_colors_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + else if (!zero) + { + xmm_dst_hi = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + + over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + w -= 4; + dst += 4; + src += 4; + } + + while (w) + { + s = *src++; + d = *dst; + + *dst++ = pack_1x128_32 ( + over_rev_non_pre_1x128 ( + unpack_32_1x128 (s), unpack_32_1x128 (d))); + + w--; + } + } + +} + +static void +sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; + uint16_t *dst_line, *dst, d; + uint32_t *mask_line, *mask, m; + int dst_stride, mask_stride; + int w; + uint32_t pack_cmp; + + __m128i xmm_src, xmm_alpha; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; + + __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + + xmm_src = expand_pixel_32_1x128 (src); + xmm_alpha = expand_alpha_1x128 (xmm_src); + mmx_src = xmm_src; + mmx_alpha = xmm_alpha; + + while (height--) + { + w = width; + mask = mask_line; + dst = dst_line; + mask_line += mask_stride; + dst_line += dst_stride; + + while (w && ((uintptr_t)dst & 15)) + { + m = *(uint32_t *) mask; + + if (m) + { + d = *dst; + mmx_mask = unpack_32_1x128 (m); + mmx_dest = expand565_16_1x128 (d); + + *dst = pack_565_32_16 ( + pack_1x128_32 ( + in_over_1x128 ( + &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); + } + + w--; + dst++; + mask++; + } + + while (w >= 8) + { + /* First round */ + xmm_mask = load_128_unaligned ((__m128i*)mask); + xmm_dst = load_128_aligned ((__m128i*)dst); + + pack_cmp = _mm_movemask_epi8 ( + _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); + + unpack_565_128_4x128 (xmm_dst, + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + + /* preload next round */ + xmm_mask = load_128_unaligned ((__m128i*)(mask + 4)); + + /* preload next round */ + if (pack_cmp != 0xffff) + { + in_over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst0, &xmm_dst1); + } + + /* Second round */ + pack_cmp = _mm_movemask_epi8 ( + _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); + + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + + if (pack_cmp != 0xffff) + { + in_over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst2, &xmm_dst3); + } + + save_128_aligned ( + (__m128i*)dst, pack_565_4x128_128 ( + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); + + w -= 8; + dst += 8; + mask += 8; + } + + while (w) + { + m = *(uint32_t *) mask; + + if (m) + { + d = *dst; + mmx_mask = unpack_32_1x128 (m); + mmx_dest = expand565_16_1x128 (d); + + *dst = pack_565_32_16 ( + pack_1x128_32 ( + in_over_1x128 ( + &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); + } + + w--; + dst++; + mask++; + } + } + +} + +static void +sse2_composite_in_n_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + uint32_t d, m; + uint32_t src; + int32_t w; + + __m128i xmm_alpha; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src)); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w && ((uintptr_t)dst & 15)) + { + m = (uint32_t) *mask++; + d = (uint32_t) *dst; + + *dst++ = (uint8_t) pack_1x128_32 ( + pix_multiply_1x128 ( + pix_multiply_1x128 (xmm_alpha, + unpack_32_1x128 (m)), + unpack_32_1x128 (d))); + w--; + } + + while (w >= 16) + { + xmm_mask = load_128_unaligned ((__m128i*)mask); + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + pix_multiply_2x128 (&xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + mask += 16; + dst += 16; + w -= 16; + } + + while (w) + { + m = (uint32_t) *mask++; + d = (uint32_t) *dst; + + *dst++ = (uint8_t) pack_1x128_32 ( + pix_multiply_1x128 ( + pix_multiply_1x128 ( + xmm_alpha, unpack_32_1x128 (m)), + unpack_32_1x128 (d))); + w--; + } + } + +} + +static void +sse2_composite_in_n_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + int dst_stride; + uint32_t d; + uint32_t src; + int32_t w; + + __m128i xmm_alpha; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src)); + + src = src >> 24; + + if (src == 0xff) + return; + + if (src == 0x00) + { + pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, + 8, dest_x, dest_y, width, height, src); + + return; + } + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + w = width; + + while (w && ((uintptr_t)dst & 15)) + { + d = (uint32_t) *dst; + + *dst++ = (uint8_t) pack_1x128_32 ( + pix_multiply_1x128 ( + xmm_alpha, + unpack_32_1x128 (d))); + w--; + } + + while (w >= 16) + { + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + pix_multiply_2x128 (&xmm_alpha, &xmm_alpha, + &xmm_dst_lo, &xmm_dst_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + dst += 16; + w -= 16; + } + + while (w) + { + d = (uint32_t) *dst; + + *dst++ = (uint8_t) pack_1x128_32 ( + pix_multiply_1x128 ( + xmm_alpha, + unpack_32_1x128 (d))); + w--; + } + } + +} + +static void +sse2_composite_in_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int src_stride, dst_stride; + int32_t w; + uint32_t s, d; + + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && ((uintptr_t)dst & 15)) + { + s = (uint32_t) *src++; + d = (uint32_t) *dst; + + *dst++ = (uint8_t) pack_1x128_32 ( + pix_multiply_1x128 ( + unpack_32_1x128 (s), unpack_32_1x128 (d))); + w--; + } + + while (w >= 16) + { + xmm_src = load_128_unaligned ((__m128i*)src); + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_dst_lo, &xmm_dst_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + src += 16; + dst += 16; + w -= 16; + } + + while (w) + { + s = (uint32_t) *src++; + d = (uint32_t) *dst; + + *dst++ = (uint8_t) pack_1x128_32 ( + pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (d))); + w--; + } + } + +} + +static void +sse2_composite_add_n_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + int32_t w; + uint32_t src; + uint32_t m, d; + + __m128i xmm_alpha; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src)); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w && ((uintptr_t)dst & 15)) + { + m = (uint32_t) *mask++; + d = (uint32_t) *dst; + + *dst++ = (uint8_t) pack_1x128_32 ( + _mm_adds_epu16 ( + pix_multiply_1x128 ( + xmm_alpha, unpack_32_1x128 (m)), + unpack_32_1x128 (d))); + w--; + } + + while (w >= 16) + { + xmm_mask = load_128_unaligned ((__m128i*)mask); + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + pix_multiply_2x128 (&xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo); + xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + mask += 16; + dst += 16; + w -= 16; + } + + while (w) + { + m = (uint32_t) *mask++; + d = (uint32_t) *dst; + + *dst++ = (uint8_t) pack_1x128_32 ( + _mm_adds_epu16 ( + pix_multiply_1x128 ( + xmm_alpha, unpack_32_1x128 (m)), + unpack_32_1x128 (d))); + + w--; + } + } + +} + +static void +sse2_composite_add_n_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + int dst_stride; + int32_t w; + uint32_t src; + + __m128i xmm_src; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + src >>= 24; + + if (src == 0x00) + return; + + if (src == 0xff) + { + pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, + 8, dest_x, dest_y, width, height, 0xff); + + return; + } + + src = (src << 24) | (src << 16) | (src << 8) | src; + xmm_src = _mm_set_epi32 (src, src, src, src); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + w = width; + + while (w && ((uintptr_t)dst & 15)) + { + *dst = (uint8_t)_mm_cvtsi128_si32 ( + _mm_adds_epu8 ( + xmm_src, + _mm_cvtsi32_si128 (*dst))); + + w--; + dst++; + } + + while (w >= 16) + { + save_128_aligned ( + (__m128i*)dst, _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst))); + + dst += 16; + w -= 16; + } + + while (w) + { + *dst = (uint8_t)_mm_cvtsi128_si32 ( + _mm_adds_epu8 ( + xmm_src, + _mm_cvtsi32_si128 (*dst))); + + w--; + dst++; + } + } + +} + +static void +sse2_composite_add_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + uint16_t t; + + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + src = src_line; + + dst_line += dst_stride; + src_line += src_stride; + w = width; + + /* Small head */ + while (w && (uintptr_t)dst & 3) + { + t = (*dst) + (*src++); + *dst++ = t | (0 - (t >> 8)); + w--; + } + + sse2_combine_add_u (imp, op, + (uint32_t*)dst, (uint32_t*)src, NULL, w >> 2); + + /* Small tail */ + dst += w & 0xfffc; + src += w & 0xfffc; + + w &= 3; + + while (w) + { + t = (*dst) + (*src++); + *dst++ = t | (0 - (t >> 8)); + w--; + } + } + +} + +static void +sse2_composite_add_8888_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + + sse2_combine_add_u (imp, op, dst, src, NULL, width); + } +} + +static void +sse2_composite_add_n_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst, src; + int dst_stride; + + __m128i xmm_src; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + if (src == 0) + return; + + if (src == ~0) + { + pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, 32, + dest_x, dest_y, width, height, ~0); + + return; + } + + xmm_src = _mm_set_epi32 (src, src, src, src); + while (height--) + { + int w = width; + uint32_t d; + + dst = dst_line; + dst_line += dst_stride; + + while (w && (uintptr_t)dst & 15) + { + d = *dst; + *dst++ = + _mm_cvtsi128_si32 ( _mm_adds_epu8 (xmm_src, _mm_cvtsi32_si128 (d))); + w--; + } + + while (w >= 4) + { + save_128_aligned + ((__m128i*)dst, + _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst))); + + dst += 4; + w -= 4; + } + + while (w--) + { + d = *dst; + *dst++ = + _mm_cvtsi128_si32 (_mm_adds_epu8 (xmm_src, + _mm_cvtsi32_si128 (d))); + } + } +} + +static void +sse2_composite_add_n_8_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + int32_t w; + uint32_t src; + + __m128i xmm_src; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + if (src == 0) + return; + xmm_src = expand_pixel_32_1x128 (src); + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w && ((uintptr_t)dst & 15)) + { + uint8_t m = *mask++; + if (m) + { + *dst = pack_1x128_32 + (_mm_adds_epu16 + (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)), + unpack_32_1x128 (*dst))); + } + dst++; + w--; + } + + while (w >= 4) + { + uint32_t m = *(uint32_t*)mask; + if (m) + { + __m128i xmm_mask_lo, xmm_mask_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + + __m128i xmm_dst = load_128_aligned ((__m128i*)dst); + __m128i xmm_mask = + _mm_unpacklo_epi8 (unpack_32_1x128(m), + _mm_setzero_si128 ()); + + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + pix_multiply_2x128 (&xmm_src, &xmm_src, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo); + xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + w -= 4; + dst += 4; + mask += 4; + } + + while (w) + { + uint8_t m = *mask++; + if (m) + { + *dst = pack_1x128_32 + (_mm_adds_epu16 + (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)), + unpack_32_1x128 (*dst))); + } + dst++; + w--; + } + } +} + +static pixman_bool_t +sse2_blt (pixman_implementation_t *imp, + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dest_x, + int dest_y, + int width, + int height) +{ + uint8_t * src_bytes; + uint8_t * dst_bytes; + int byte_width; + + if (src_bpp != dst_bpp) + return FALSE; + + if (src_bpp == 16) + { + src_stride = src_stride * (int) sizeof (uint32_t) / 2; + dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; + src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x)); + dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x)); + byte_width = 2 * width; + src_stride *= 2; + dst_stride *= 2; + } + else if (src_bpp == 32) + { + src_stride = src_stride * (int) sizeof (uint32_t) / 4; + dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; + src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x)); + dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x)); + byte_width = 4 * width; + src_stride *= 4; + dst_stride *= 4; + } + else + { + return FALSE; + } + + while (height--) + { + int w; + uint8_t *s = src_bytes; + uint8_t *d = dst_bytes; + src_bytes += src_stride; + dst_bytes += dst_stride; + w = byte_width; + + while (w >= 2 && ((uintptr_t)d & 3)) + { + *(uint16_t *)d = *(uint16_t *)s; + w -= 2; + s += 2; + d += 2; + } + + while (w >= 4 && ((uintptr_t)d & 15)) + { + *(uint32_t *)d = *(uint32_t *)s; + + w -= 4; + s += 4; + d += 4; + } + + while (w >= 64) + { + __m128i xmm0, xmm1, xmm2, xmm3; + + xmm0 = load_128_unaligned ((__m128i*)(s)); + xmm1 = load_128_unaligned ((__m128i*)(s + 16)); + xmm2 = load_128_unaligned ((__m128i*)(s + 32)); + xmm3 = load_128_unaligned ((__m128i*)(s + 48)); + + save_128_aligned ((__m128i*)(d), xmm0); + save_128_aligned ((__m128i*)(d + 16), xmm1); + save_128_aligned ((__m128i*)(d + 32), xmm2); + save_128_aligned ((__m128i*)(d + 48), xmm3); + + s += 64; + d += 64; + w -= 64; + } + + while (w >= 16) + { + save_128_aligned ((__m128i*)d, load_128_unaligned ((__m128i*)s) ); + + w -= 16; + d += 16; + s += 16; + } + + while (w >= 4) + { + *(uint32_t *)d = *(uint32_t *)s; + + w -= 4; + s += 4; + d += 4; + } + + if (w >= 2) + { + *(uint16_t *)d = *(uint16_t *)s; + w -= 2; + s += 2; + d += 2; + } + } + + return TRUE; +} + +static void +sse2_composite_copy_area (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + sse2_blt (imp, src_image->bits.bits, + dest_image->bits.bits, + src_image->bits.rowstride, + dest_image->bits.rowstride, + PIXMAN_FORMAT_BPP (src_image->bits.format), + PIXMAN_FORMAT_BPP (dest_image->bits.format), + src_x, src_y, dest_x, dest_y, width, height); +} + +static void +sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *src, *src_line, s; + uint32_t *dst, *dst_line, d; + uint8_t *mask, *mask_line; + uint32_t m; + int src_stride, mask_stride, dst_stride; + int32_t w; + __m128i ms; + + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + src = src_line; + src_line += src_stride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + + w = width; + + while (w && (uintptr_t)dst & 15) + { + s = 0xff000000 | *src++; + m = (uint32_t) *mask++; + d = *dst; + ms = unpack_32_1x128 (s); + + if (m != 0xff) + { + __m128i ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m)); + __m128i md = unpack_32_1x128 (d); + + ms = in_over_1x128 (&ms, &mask_00ff, &ma, &md); + } + + *dst++ = pack_1x128_32 (ms); + w--; + } + + while (w >= 4) + { + m = *(uint32_t*) mask; + xmm_src = _mm_or_si128 ( + load_128_unaligned ((__m128i*)src), mask_ff000000); + + if (m == 0xffffffff) + { + save_128_aligned ((__m128i*)dst, xmm_src); + } + else + { + xmm_dst = load_128_aligned ((__m128i*)dst); + + xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128()); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_rev_2x128 ( + xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, + &mask_00ff, &mask_00ff, &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + src += 4; + dst += 4; + mask += 4; + w -= 4; + } + + while (w) + { + m = (uint32_t) *mask++; + + if (m) + { + s = 0xff000000 | *src; + + if (m == 0xff) + { + *dst = s; + } + else + { + __m128i ma, md, ms; + + d = *dst; + + ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m)); + md = unpack_32_1x128 (d); + ms = unpack_32_1x128 (s); + + *dst = pack_1x128_32 (in_over_1x128 (&ms, &mask_00ff, &ma, &md)); + } + + } + + src++; + dst++; + w--; + } + } + +} + +static void +sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *src, *src_line, s; + uint32_t *dst, *dst_line, d; + uint8_t *mask, *mask_line; + uint32_t m; + int src_stride, mask_stride, dst_stride; + int32_t w; + + __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + src = src_line; + src_line += src_stride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + + w = width; + + while (w && (uintptr_t)dst & 15) + { + uint32_t sa; + + s = *src++; + m = (uint32_t) *mask++; + d = *dst; + + sa = s >> 24; + + if (m) + { + if (sa == 0xff && m == 0xff) + { + *dst = s; + } + else + { + __m128i ms, md, ma, msa; + + ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); + ms = unpack_32_1x128 (s); + md = unpack_32_1x128 (d); + + msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); + + *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); + } + } + + dst++; + w--; + } + + while (w >= 4) + { + m = *(uint32_t *) mask; + + if (m) + { + xmm_src = load_128_unaligned ((__m128i*)src); + + if (m == 0xffffffff && is_opaque (xmm_src)) + { + save_128_aligned ((__m128i *)dst, xmm_src); + } + else + { + xmm_dst = load_128_aligned ((__m128i *)dst); + + xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128()); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi); + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi, + &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + } + + src += 4; + dst += 4; + mask += 4; + w -= 4; + } + + while (w) + { + uint32_t sa; + + s = *src++; + m = (uint32_t) *mask++; + d = *dst; + + sa = s >> 24; + + if (m) + { + if (sa == 0xff && m == 0xff) + { + *dst = s; + } + else + { + __m128i ms, md, ma, msa; + + ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); + ms = unpack_32_1x128 (s); + md = unpack_32_1x128 (d); + + msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); + + *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); + } + } + + dst++; + w--; + } + } + +} + +static void +sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; + uint32_t *dst_line, *dst; + __m128i xmm_src; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_dsta_hi, xmm_dsta_lo; + int dst_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + + xmm_src = expand_pixel_32_1x128 (src); + + while (height--) + { + dst = dst_line; + + dst_line += dst_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + __m128i vd; + + vd = unpack_32_1x128 (*dst); + + *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd), + xmm_src)); + w--; + dst++; + } + + while (w >= 4) + { + __m128i tmp_lo, tmp_hi; + + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dsta_lo, &xmm_dsta_hi); + + tmp_lo = xmm_src; + tmp_hi = xmm_src; + + over_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_dsta_lo, &xmm_dsta_hi, + &tmp_lo, &tmp_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (tmp_lo, tmp_hi)); + + w -= 4; + dst += 4; + } + + while (w) + { + __m128i vd; + + vd = unpack_32_1x128 (*dst); + + *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd), + xmm_src)); + w--; + dst++; + } + + } + +} + +static void +sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *src, *src_line, s; + uint32_t *dst, *dst_line, d; + uint32_t *mask, *mask_line; + uint32_t m; + int src_stride, mask_stride, dst_stride; + int32_t w; + + __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + src = src_line; + src_line += src_stride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + + w = width; + + while (w && (uintptr_t)dst & 15) + { + uint32_t sa; + + s = *src++; + m = (*mask++) >> 24; + d = *dst; + + sa = s >> 24; + + if (m) + { + if (sa == 0xff && m == 0xff) + { + *dst = s; + } + else + { + __m128i ms, md, ma, msa; + + ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); + ms = unpack_32_1x128 (s); + md = unpack_32_1x128 (d); + + msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); + + *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); + } + } + + dst++; + w--; + } + + while (w >= 4) + { + xmm_mask = load_128_unaligned ((__m128i*)mask); + + if (!is_transparent (xmm_mask)) + { + xmm_src = load_128_unaligned ((__m128i*)src); + + if (is_opaque (xmm_mask) && is_opaque (xmm_src)) + { + save_128_aligned ((__m128i *)dst, xmm_src); + } + else + { + xmm_dst = load_128_aligned ((__m128i *)dst); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi); + expand_alpha_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi, + &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + } + + src += 4; + dst += 4; + mask += 4; + w -= 4; + } + + while (w) + { + uint32_t sa; + + s = *src++; + m = (*mask++) >> 24; + d = *dst; + + sa = s >> 24; + + if (m) + { + if (sa == 0xff && m == 0xff) + { + *dst = s; + } + else + { + __m128i ms, md, ma, msa; + + ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); + ms = unpack_32_1x128 (s); + md = unpack_32_1x128 (d); + + msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); + + *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); + } + } + + dst++; + w--; + } + } + +} + +/* A variant of 'sse2_combine_over_u' with minor tweaks */ +static force_inline void +scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd, + const uint32_t* ps, + int32_t w, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t src_width_fixed, + pixman_bool_t fully_transparent_src) +{ + uint32_t s, d; + const uint32_t* pm = NULL; + + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_alpha_lo, xmm_alpha_hi; + + if (fully_transparent_src) + return; + + /* Align dst on a 16-byte boundary */ + while (w && ((uintptr_t)pd & 15)) + { + d = *pd; + s = combine1 (ps + pixman_fixed_to_int (vx), pm); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + + *pd++ = core_combine_over_u_pixel_sse2 (s, d); + if (pm) + pm++; + w--; + } + + while (w >= 4) + { + __m128i tmp; + uint32_t tmp1, tmp2, tmp3, tmp4; + + tmp1 = *(ps + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + tmp2 = *(ps + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + tmp3 = *(ps + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + tmp4 = *(ps + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + + tmp = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1); + + xmm_src_hi = combine4 ((__m128i*)&tmp, (__m128i*)pm); + + if (is_opaque (xmm_src_hi)) + { + save_128_aligned ((__m128i*)pd, xmm_src_hi); + } + else if (!is_zero (xmm_src_hi)) + { + xmm_dst_hi = load_128_aligned ((__m128i*) pd); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 ( + xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi); + + over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_dst_lo, &xmm_dst_hi); + + /* rebuid the 4 pixel data and save*/ + save_128_aligned ((__m128i*)pd, + pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + w -= 4; + pd += 4; + if (pm) + pm += 4; + } + + while (w) + { + d = *pd; + s = combine1 (ps + pixman_fixed_to_int (vx), pm); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + + *pd++ = core_combine_over_u_pixel_sse2 (s, d); + if (pm) + pm++; + + w--; + } +} + +FAST_NEAREST_MAINLOOP (sse2_8888_8888_cover_OVER, + scaled_nearest_scanline_sse2_8888_8888_OVER, + uint32_t, uint32_t, COVER) +FAST_NEAREST_MAINLOOP (sse2_8888_8888_none_OVER, + scaled_nearest_scanline_sse2_8888_8888_OVER, + uint32_t, uint32_t, NONE) +FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER, + scaled_nearest_scanline_sse2_8888_8888_OVER, + uint32_t, uint32_t, PAD) +FAST_NEAREST_MAINLOOP (sse2_8888_8888_normal_OVER, + scaled_nearest_scanline_sse2_8888_8888_OVER, + uint32_t, uint32_t, NORMAL) + +static force_inline void +scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask, + uint32_t * dst, + const uint32_t * src, + int32_t w, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t src_width_fixed, + pixman_bool_t zero_src) +{ + __m128i xmm_mask; + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_lo, xmm_alpha_hi; + + if (zero_src || (*mask >> 24) == 0) + return; + + xmm_mask = create_mask_16_128 (*mask >> 24); + + while (w && (uintptr_t)dst & 15) + { + uint32_t s = *(src + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + + if (s) + { + uint32_t d = *dst; + + __m128i ms = unpack_32_1x128 (s); + __m128i alpha = expand_alpha_1x128 (ms); + __m128i dest = xmm_mask; + __m128i alpha_dst = unpack_32_1x128 (d); + + *dst = pack_1x128_32 ( + in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); + } + dst++; + w--; + } + + while (w >= 4) + { + uint32_t tmp1, tmp2, tmp3, tmp4; + + tmp1 = *(src + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + tmp2 = *(src + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + tmp3 = *(src + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + tmp4 = *(src + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + + xmm_src = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1); + + if (!is_zero (xmm_src)) + { + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_mask, &xmm_mask, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + dst += 4; + w -= 4; + } + + while (w) + { + uint32_t s = *(src + pixman_fixed_to_int (vx)); + vx += unit_x; + while (vx >= 0) + vx -= src_width_fixed; + + if (s) + { + uint32_t d = *dst; + + __m128i ms = unpack_32_1x128 (s); + __m128i alpha = expand_alpha_1x128 (ms); + __m128i mask = xmm_mask; + __m128i dest = unpack_32_1x128 (d); + + *dst = pack_1x128_32 ( + in_over_1x128 (&ms, &alpha, &mask, &dest)); + } + + dst++; + w--; + } + +} + +FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER, + scaled_nearest_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE) +FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER, + scaled_nearest_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE) +FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER, + scaled_nearest_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE) +FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER, + scaled_nearest_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE) + +#if BILINEAR_INTERPOLATION_BITS < 8 +# define BILINEAR_DECLARE_VARIABLES \ + const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \ + const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \ + const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \ + const __m128i xmm_ux = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \ + unit_x, -unit_x, unit_x, -unit_x); \ + const __m128i xmm_zero = _mm_setzero_si128 (); \ + __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), \ + vx, -(vx + 1), vx, -(vx + 1)) +#else +# define BILINEAR_DECLARE_VARIABLES \ + const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \ + const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \ + const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); \ + const __m128i xmm_ux = _mm_set_epi16 (unit_x, unit_x, unit_x, unit_x, \ + -unit_x, -unit_x, -unit_x, -unit_x); \ + const __m128i xmm_zero = _mm_setzero_si128 (); \ + __m128i xmm_x = _mm_set_epi16 (vx, vx, vx, vx, \ + -(vx + 1), -(vx + 1), -(vx + 1), -(vx + 1)) +#endif + +#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix) \ +do { \ + __m128i xmm_wh, xmm_lo, xmm_hi, a; \ + /* fetch 2x2 pixel block into sse2 registers */ \ + __m128i tltr = _mm_loadl_epi64 ( \ + (__m128i *)&src_top[pixman_fixed_to_int (vx)]); \ + __m128i blbr = _mm_loadl_epi64 ( \ + (__m128i *)&src_bottom[pixman_fixed_to_int (vx)]); \ + vx += unit_x; \ + /* vertical interpolation */ \ + a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), \ + xmm_wt), \ + _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), \ + xmm_wb)); \ + if (BILINEAR_INTERPOLATION_BITS < 8) \ + { \ + /* calculate horizontal weights */ \ + xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \ + 16 - BILINEAR_INTERPOLATION_BITS)); \ + xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \ + /* horizontal interpolation */ \ + a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 ( \ + a, _MM_SHUFFLE (1, 0, 3, 2)), a), xmm_wh); \ + } \ + else \ + { \ + /* calculate horizontal weights */ \ + xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \ + 16 - BILINEAR_INTERPOLATION_BITS)); \ + xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \ + /* horizontal interpolation */ \ + xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \ + xmm_hi = _mm_mulhi_epu16 (a, xmm_wh); \ + a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi), \ + _mm_unpackhi_epi16 (xmm_lo, xmm_hi)); \ + } \ + /* shift and pack the result */ \ + a = _mm_srli_epi32 (a, BILINEAR_INTERPOLATION_BITS * 2); \ + a = _mm_packs_epi32 (a, a); \ + a = _mm_packus_epi16 (a, a); \ + pix = _mm_cvtsi128_si32 (a); \ +} while (0) + +#define BILINEAR_SKIP_ONE_PIXEL() \ +do { \ + vx += unit_x; \ + xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \ +} while(0) + +static force_inline void +scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst, + const uint32_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + BILINEAR_DECLARE_VARIABLES; + uint32_t pix1, pix2, pix3, pix4; + + while ((w -= 4) >= 0) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix3); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix4); + *dst++ = pix1; + *dst++ = pix2; + *dst++ = pix3; + *dst++ = pix4; + } + + if (w & 2) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); + *dst++ = pix1; + *dst++ = pix2; + } + + if (w & 1) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + *dst = pix1; + } + +} + +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC, + scaled_bilinear_scanline_sse2_8888_8888_SRC, + uint32_t, uint32_t, uint32_t, + COVER, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC, + scaled_bilinear_scanline_sse2_8888_8888_SRC, + uint32_t, uint32_t, uint32_t, + PAD, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC, + scaled_bilinear_scanline_sse2_8888_8888_SRC, + uint32_t, uint32_t, uint32_t, + NONE, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC, + scaled_bilinear_scanline_sse2_8888_8888_SRC, + uint32_t, uint32_t, uint32_t, + NORMAL, FLAG_NONE) + +static force_inline void +scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst, + const uint32_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + BILINEAR_DECLARE_VARIABLES; + uint32_t pix1, pix2, pix3, pix4; + + while (w && ((uintptr_t)dst & 15)) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + + if (pix1) + { + pix2 = *dst; + *dst = core_combine_over_u_pixel_sse2 (pix1, pix2); + } + + w--; + dst++; + } + + while (w >= 4) + { + __m128i xmm_src; + __m128i xmm_src_hi, xmm_src_lo, xmm_dst_hi, xmm_dst_lo; + __m128i xmm_alpha_hi, xmm_alpha_lo; + + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix3); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix4); + + xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1); + + if (!is_zero (xmm_src)) + { + if (is_opaque (xmm_src)) + { + save_128_aligned ((__m128i *)dst, xmm_src); + } + else + { + __m128i xmm_dst = load_128_aligned ((__m128i *)dst); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi); + over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ((__m128i *)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + } + + w -= 4; + dst += 4; + } + + while (w) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + + if (pix1) + { + pix2 = *dst; + *dst = core_combine_over_u_pixel_sse2 (pix1, pix2); + } + + w--; + dst++; + } +} + +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_OVER, + scaled_bilinear_scanline_sse2_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + COVER, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_OVER, + scaled_bilinear_scanline_sse2_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + PAD, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_OVER, + scaled_bilinear_scanline_sse2_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + NONE, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_OVER, + scaled_bilinear_scanline_sse2_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + NORMAL, FLAG_NONE) + +static force_inline void +scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst, + const uint8_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + BILINEAR_DECLARE_VARIABLES; + uint32_t pix1, pix2, pix3, pix4; + uint32_t m; + + while (w && ((uintptr_t)dst & 15)) + { + uint32_t sa; + + m = (uint32_t) *mask++; + + if (m) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + sa = pix1 >> 24; + + if (sa == 0xff && m == 0xff) + { + *dst = pix1; + } + else + { + __m128i ms, md, ma, msa; + + pix2 = *dst; + ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); + ms = unpack_32_1x128 (pix1); + md = unpack_32_1x128 (pix2); + + msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); + + *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); + } + } + else + { + BILINEAR_SKIP_ONE_PIXEL (); + } + + w--; + dst++; + } + + while (w >= 4) + { + __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + + m = *(uint32_t*)mask; + + if (m) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix3); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix4); + + xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1); + + if (m == 0xffffffff && is_opaque (xmm_src)) + { + save_128_aligned ((__m128i *)dst, xmm_src); + } + else + { + xmm_dst = load_128_aligned ((__m128i *)dst); + + xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128()); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi); + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi, + &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + } + else + { + BILINEAR_SKIP_ONE_PIXEL (); + BILINEAR_SKIP_ONE_PIXEL (); + BILINEAR_SKIP_ONE_PIXEL (); + BILINEAR_SKIP_ONE_PIXEL (); + } + + w -= 4; + dst += 4; + mask += 4; + } + + while (w) + { + uint32_t sa; + + m = (uint32_t) *mask++; + + if (m) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + sa = pix1 >> 24; + + if (sa == 0xff && m == 0xff) + { + *dst = pix1; + } + else + { + __m128i ms, md, ma, msa; + + pix2 = *dst; + ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); + ms = unpack_32_1x128 (pix1); + md = unpack_32_1x128 (pix2); + + msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); + + *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); + } + } + else + { + BILINEAR_SKIP_ONE_PIXEL (); + } + + w--; + dst++; + } +} + +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_cover_OVER, + scaled_bilinear_scanline_sse2_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + COVER, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_pad_OVER, + scaled_bilinear_scanline_sse2_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + PAD, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_none_OVER, + scaled_bilinear_scanline_sse2_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + NONE, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER, + scaled_bilinear_scanline_sse2_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + NORMAL, FLAG_HAVE_NON_SOLID_MASK) + +static force_inline void +scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t * dst, + const uint32_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + BILINEAR_DECLARE_VARIABLES; + uint32_t pix1, pix2, pix3, pix4; + __m128i xmm_mask; + + if (zero_src || (*mask >> 24) == 0) + return; + + xmm_mask = create_mask_16_128 (*mask >> 24); + + while (w && ((uintptr_t)dst & 15)) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + if (pix1) + { + uint32_t d = *dst; + + __m128i ms = unpack_32_1x128 (pix1); + __m128i alpha = expand_alpha_1x128 (ms); + __m128i dest = xmm_mask; + __m128i alpha_dst = unpack_32_1x128 (d); + + *dst = pack_1x128_32 + (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); + } + + dst++; + w--; + } + + while (w >= 4) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix3); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix4); + + if (pix1 | pix2 | pix3 | pix4) + { + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_lo, xmm_alpha_hi; + + xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1); + + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_mask, &xmm_mask, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned + ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + dst += 4; + w -= 4; + } + + while (w) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + if (pix1) + { + uint32_t d = *dst; + + __m128i ms = unpack_32_1x128 (pix1); + __m128i alpha = expand_alpha_1x128 (ms); + __m128i dest = xmm_mask; + __m128i alpha_dst = unpack_32_1x128 (d); + + *dst = pack_1x128_32 + (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); + } + + dst++; + w--; + } +} + +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, + COVER, FLAG_HAVE_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, + PAD, FLAG_HAVE_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, + NONE, FLAG_HAVE_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, + NORMAL, FLAG_HAVE_SOLID_MASK) + +static const pixman_fast_path_t sse2_fast_paths[] = +{ + /* PIXMAN_OP_OVER */ + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, sse2_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, sse2_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, sse2_composite_over_n_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, sse2_composite_over_n_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, sse2_composite_over_n_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, null, b5g6r5, sse2_composite_over_n_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, sse2_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, sse2_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, sse2_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, sse2_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, sse2_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, sse2_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, sse2_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, sse2_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, sse2_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, sse2_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, sse2_composite_over_8888_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, sse2_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, sse2_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, sse2_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, sse2_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, sse2_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, sse2_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, sse2_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, sse2_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, sse2_composite_over_x888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, sse2_composite_over_x888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, sse2_composite_over_x888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, sse2_composite_over_x888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, sse2_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, sse2_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, sse2_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, sse2_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, sse2_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, sse2_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, sse2_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, sse2_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, sse2_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, sse2_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, a8r8g8b8, sse2_composite_over_pixbuf_8888), + PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, x8r8g8b8, sse2_composite_over_pixbuf_8888), + PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, a8b8g8r8, sse2_composite_over_pixbuf_8888), + PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, x8b8g8r8, sse2_composite_over_pixbuf_8888), + PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, r5g6b5, sse2_composite_over_pixbuf_0565), + PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, b5g6r5, sse2_composite_over_pixbuf_0565), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area), + + /* PIXMAN_OP_OVER_REVERSE */ + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, sse2_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, sse2_composite_over_reverse_n_8888), + + /* PIXMAN_OP_ADD */ + PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, sse2_composite_add_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, sse2_composite_add_8_8), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, sse2_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8), + PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, sse2_composite_add_n_8), + PIXMAN_STD_FAST_PATH (ADD, solid, null, x8r8g8b8, sse2_composite_add_n_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, null, a8r8g8b8, sse2_composite_add_n_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, null, x8b8g8r8, sse2_composite_add_n_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, null, a8b8g8r8, sse2_composite_add_n_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8r8g8b8, sse2_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, sse2_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8b8g8r8, sse2_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, sse2_composite_add_n_8_8888), + + /* PIXMAN_OP_SRC */ + PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, sse2_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, sse2_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, sse2_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, sse2_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, sse2_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, sse2_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, sse2_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, sse2_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, sse2_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, sse2_composite_copy_area), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, sse2_composite_copy_area), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, sse2_composite_copy_area), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, sse2_composite_copy_area), + + /* PIXMAN_OP_IN */ + PIXMAN_STD_FAST_PATH (IN, a8, null, a8, sse2_composite_in_8_8), + PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, sse2_composite_in_n_8_8), + PIXMAN_STD_FAST_PATH (IN, solid, null, a8, sse2_composite_in_n_8), + + SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), + SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), + + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888), + + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, sse2_8888_8888), + + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), + + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888), + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888), + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888), + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8_8888), + + { PIXMAN_OP_NONE }, +}; + +static uint32_t * +sse2_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) +{ + int w = iter->width; + __m128i ff000000 = mask_ff000000; + uint32_t *dst = iter->buffer; + uint32_t *src = (uint32_t *)iter->bits; + + iter->bits += iter->stride; + + while (w && ((uintptr_t)dst) & 0x0f) + { + *dst++ = (*src++) | 0xff000000; + w--; + } + + while (w >= 4) + { + save_128_aligned ( + (__m128i *)dst, _mm_or_si128 ( + load_128_unaligned ((__m128i *)src), ff000000)); + + dst += 4; + src += 4; + w -= 4; + } + + while (w) + { + *dst++ = (*src++) | 0xff000000; + w--; + } + + return iter->buffer; +} + +static uint32_t * +sse2_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) +{ + int w = iter->width; + uint32_t *dst = iter->buffer; + uint16_t *src = (uint16_t *)iter->bits; + __m128i ff000000 = mask_ff000000; + + iter->bits += iter->stride; + + while (w && ((uintptr_t)dst) & 0x0f) + { + uint16_t s = *src++; + + *dst++ = convert_0565_to_8888 (s); + w--; + } + + while (w >= 8) + { + __m128i lo, hi, s; + + s = _mm_loadu_si128 ((__m128i *)src); + + lo = unpack_565_to_8888 (_mm_unpacklo_epi16 (s, _mm_setzero_si128 ())); + hi = unpack_565_to_8888 (_mm_unpackhi_epi16 (s, _mm_setzero_si128 ())); + + save_128_aligned ((__m128i *)(dst + 0), _mm_or_si128 (lo, ff000000)); + save_128_aligned ((__m128i *)(dst + 4), _mm_or_si128 (hi, ff000000)); + + dst += 8; + src += 8; + w -= 8; + } + + while (w) + { + uint16_t s = *src++; + + *dst++ = convert_0565_to_8888 (s); + w--; + } + + return iter->buffer; +} + +static uint32_t * +sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) +{ + int w = iter->width; + uint32_t *dst = iter->buffer; + uint8_t *src = iter->bits; + __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6; + + iter->bits += iter->stride; + + while (w && (((uintptr_t)dst) & 15)) + { + *dst++ = *(src++) << 24; + w--; + } + + while (w >= 16) + { + xmm0 = _mm_loadu_si128((__m128i *)src); + + xmm1 = _mm_unpacklo_epi8 (_mm_setzero_si128(), xmm0); + xmm2 = _mm_unpackhi_epi8 (_mm_setzero_si128(), xmm0); + xmm3 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm1); + xmm4 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm1); + xmm5 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm2); + xmm6 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm2); + + _mm_store_si128(((__m128i *)(dst + 0)), xmm3); + _mm_store_si128(((__m128i *)(dst + 4)), xmm4); + _mm_store_si128(((__m128i *)(dst + 8)), xmm5); + _mm_store_si128(((__m128i *)(dst + 12)), xmm6); + + dst += 16; + src += 16; + w -= 16; + } + + while (w) + { + *dst++ = *(src++) << 24; + w--; + } + + return iter->buffer; +} + +typedef struct +{ + pixman_format_code_t format; + pixman_iter_get_scanline_t get_scanline; +} fetcher_info_t; + +static const fetcher_info_t fetchers[] = +{ + { PIXMAN_x8r8g8b8, sse2_fetch_x8r8g8b8 }, + { PIXMAN_r5g6b5, sse2_fetch_r5g6b5 }, + { PIXMAN_a8, sse2_fetch_a8 }, + { PIXMAN_null } +}; + +static pixman_bool_t +sse2_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +{ + pixman_image_t *image = iter->image; + +#define FLAGS \ + (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ + FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) + + if ((iter->iter_flags & ITER_NARROW) && + (iter->image_flags & FLAGS) == FLAGS) + { + const fetcher_info_t *f; + + for (f = &fetchers[0]; f->format != PIXMAN_null; f++) + { + if (image->common.extended_format_code == f->format) + { + uint8_t *b = (uint8_t *)image->bits.bits; + int s = image->bits.rowstride * 4; + + iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; + iter->stride = s; + + iter->get_scanline = f->get_scanline; + return TRUE; + } + } + } + + return FALSE; +} + +#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) +__attribute__((__force_align_arg_pointer__)) +#endif +pixman_implementation_t * +_pixman_implementation_create_sse2 (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = _pixman_implementation_create (fallback, sse2_fast_paths); + + /* SSE2 constants */ + mask_565_r = create_mask_2x32_128 (0x00f80000, 0x00f80000); + mask_565_g1 = create_mask_2x32_128 (0x00070000, 0x00070000); + mask_565_g2 = create_mask_2x32_128 (0x000000e0, 0x000000e0); + mask_565_b = create_mask_2x32_128 (0x0000001f, 0x0000001f); + mask_red = create_mask_2x32_128 (0x00f80000, 0x00f80000); + mask_green = create_mask_2x32_128 (0x0000fc00, 0x0000fc00); + mask_blue = create_mask_2x32_128 (0x000000f8, 0x000000f8); + mask_565_fix_rb = create_mask_2x32_128 (0x00e000e0, 0x00e000e0); + mask_565_fix_g = create_mask_2x32_128 (0x0000c000, 0x0000c000); + mask_0080 = create_mask_16_128 (0x0080); + mask_00ff = create_mask_16_128 (0x00ff); + mask_0101 = create_mask_16_128 (0x0101); + mask_ffff = create_mask_16_128 (0xffff); + mask_ff000000 = create_mask_2x32_128 (0xff000000, 0xff000000); + mask_alpha = create_mask_2x32_128 (0x00ff0000, 0x00000000); + mask_565_rb = create_mask_2x32_128 (0x00f800f8, 0x00f800f8); + mask_565_pack_multiplier = create_mask_2x32_128 (0x20000004, 0x20000004); + + /* Set up function pointers */ + imp->combine_32[PIXMAN_OP_OVER] = sse2_combine_over_u; + imp->combine_32[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_u; + imp->combine_32[PIXMAN_OP_IN] = sse2_combine_in_u; + imp->combine_32[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_u; + imp->combine_32[PIXMAN_OP_OUT] = sse2_combine_out_u; + imp->combine_32[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_u; + imp->combine_32[PIXMAN_OP_ATOP] = sse2_combine_atop_u; + imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_u; + imp->combine_32[PIXMAN_OP_XOR] = sse2_combine_xor_u; + imp->combine_32[PIXMAN_OP_ADD] = sse2_combine_add_u; + + imp->combine_32[PIXMAN_OP_SATURATE] = sse2_combine_saturate_u; + + imp->combine_32_ca[PIXMAN_OP_SRC] = sse2_combine_src_ca; + imp->combine_32_ca[PIXMAN_OP_OVER] = sse2_combine_over_ca; + imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_IN] = sse2_combine_in_ca; + imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_OUT] = sse2_combine_out_ca; + imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_ATOP] = sse2_combine_atop_ca; + imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_XOR] = sse2_combine_xor_ca; + imp->combine_32_ca[PIXMAN_OP_ADD] = sse2_combine_add_ca; + + imp->blt = sse2_blt; + imp->fill = sse2_fill; + + imp->src_iter_init = sse2_src_iter_init; + + return imp; +} diff --git a/programs/develop/libraries/pixman/pixman-trap.c b/programs/develop/libraries/pixman/pixman-trap.c index 8353992c58..91766fdbfc 100644 --- a/programs/develop/libraries/pixman/pixman-trap.c +++ b/programs/develop/libraries/pixman/pixman-trap.c @@ -1,4 +1,5 @@ /* + * Copyright © 2002 Keith Packard, member of The XFree86 Project, Inc. * Copyright © 2004 Keith Packard * * Permission to use, copy, modify, distribute, and sell this software and its @@ -25,6 +26,7 @@ #endif #include +#include #include "pixman-private.h" /* @@ -137,7 +139,7 @@ _pixman_edge_multi_init (pixman_edge_t * e, if (ne > 0) { int nx = ne / e->dy; - ne -= nx * e->dy; + ne -= nx * (pixman_fixed_48_16_t)e->dy; stepx += nx * e->signdx; } @@ -228,14 +230,13 @@ pixman_line_fixed_edge_init (pixman_edge_t * e, } PIXMAN_EXPORT void -pixman_add_traps (pixman_image_t * image, - int16_t x_off, - int16_t y_off, - int ntrap, - pixman_trap_t * traps) +pixman_add_traps (pixman_image_t * image, + int16_t x_off, + int16_t y_off, + int ntrap, + const pixman_trap_t *traps) { int bpp; - int width; int height; pixman_fixed_t x_off_fixed; @@ -245,7 +246,6 @@ pixman_add_traps (pixman_image_t * image, _pixman_image_validate (image); - width = image->bits.width; height = image->bits.height; bpp = PIXMAN_FORMAT_BPP (image->bits.format); @@ -349,10 +349,8 @@ pixman_rasterize_trapezoid (pixman_image_t * image, int y_off) { int bpp; - int width; int height; - pixman_fixed_t x_off_fixed; pixman_fixed_t y_off_fixed; pixman_edge_t l, r; pixman_fixed_t t, b; @@ -364,11 +362,9 @@ pixman_rasterize_trapezoid (pixman_image_t * image, if (!pixman_trapezoid_valid (trap)) return; - width = image->bits.width; height = image->bits.height; bpp = PIXMAN_FORMAT_BPP (image->bits.format); - x_off_fixed = pixman_int_to_fixed (x_off); y_off_fixed = pixman_int_to_fixed (y_off); t = trap->top + y_off_fixed; @@ -390,3 +386,326 @@ pixman_rasterize_trapezoid (pixman_image_t * image, pixman_rasterize_edges (image, &l, &r, t, b); } } + +static const pixman_bool_t zero_src_has_no_effect[PIXMAN_N_OPERATORS] = +{ + FALSE, /* Clear 0 0 */ + FALSE, /* Src 1 0 */ + TRUE, /* Dst 0 1 */ + TRUE, /* Over 1 1-Aa */ + TRUE, /* OverReverse 1-Ab 1 */ + FALSE, /* In Ab 0 */ + FALSE, /* InReverse 0 Aa */ + FALSE, /* Out 1-Ab 0 */ + TRUE, /* OutReverse 0 1-Aa */ + TRUE, /* Atop Ab 1-Aa */ + FALSE, /* AtopReverse 1-Ab Aa */ + TRUE, /* Xor 1-Ab 1-Aa */ + TRUE, /* Add 1 1 */ +}; + +static pixman_bool_t +get_trap_extents (pixman_op_t op, pixman_image_t *dest, + const pixman_trapezoid_t *traps, int n_traps, + pixman_box32_t *box) +{ + int i; + + /* When the operator is such that a zero source has an + * effect on the underlying image, we have to + * composite across the entire destination + */ + if (!zero_src_has_no_effect [op]) + { + box->x1 = 0; + box->y1 = 0; + box->x2 = dest->bits.width; + box->y2 = dest->bits.height; + return TRUE; + } + + box->x1 = INT32_MAX; + box->y1 = INT32_MAX; + box->x2 = INT32_MIN; + box->y2 = INT32_MIN; + + for (i = 0; i < n_traps; ++i) + { + const pixman_trapezoid_t *trap = &(traps[i]); + int y1, y2; + + if (!pixman_trapezoid_valid (trap)) + continue; + + y1 = pixman_fixed_to_int (trap->top); + if (y1 < box->y1) + box->y1 = y1; + + y2 = pixman_fixed_to_int (pixman_fixed_ceil (trap->bottom)); + if (y2 > box->y2) + box->y2 = y2; + +#define EXTEND_MIN(x) \ + if (pixman_fixed_to_int ((x)) < box->x1) \ + box->x1 = pixman_fixed_to_int ((x)); +#define EXTEND_MAX(x) \ + if (pixman_fixed_to_int (pixman_fixed_ceil ((x))) > box->x2) \ + box->x2 = pixman_fixed_to_int (pixman_fixed_ceil ((x))); + +#define EXTEND(x) \ + EXTEND_MIN(x); \ + EXTEND_MAX(x); + + EXTEND(trap->left.p1.x); + EXTEND(trap->left.p2.x); + EXTEND(trap->right.p1.x); + EXTEND(trap->right.p2.x); + } + + if (box->x1 >= box->x2 || box->y1 >= box->y2) + return FALSE; + + return TRUE; +} + +/* + * pixman_composite_trapezoids() + * + * All the trapezoids are conceptually rendered to an infinitely big image. + * The (0, 0) coordinates of this image are then aligned with the (x, y) + * coordinates of the source image, and then both images are aligned with + * the (x, y) coordinates of the destination. Then these three images are + * composited across the entire destination. + */ +PIXMAN_EXPORT void +pixman_composite_trapezoids (pixman_op_t op, + pixman_image_t * src, + pixman_image_t * dst, + pixman_format_code_t mask_format, + int x_src, + int y_src, + int x_dst, + int y_dst, + int n_traps, + const pixman_trapezoid_t * traps) +{ + int i; + + return_if_fail (PIXMAN_FORMAT_TYPE (mask_format) == PIXMAN_TYPE_A); + + if (n_traps <= 0) + return; + + _pixman_image_validate (src); + _pixman_image_validate (dst); + + if (op == PIXMAN_OP_ADD && + (src->common.flags & FAST_PATH_IS_OPAQUE) && + (mask_format == dst->common.extended_format_code) && + !(dst->common.have_clip_region)) + { + for (i = 0; i < n_traps; ++i) + { + const pixman_trapezoid_t *trap = &(traps[i]); + + if (!pixman_trapezoid_valid (trap)) + continue; + + pixman_rasterize_trapezoid (dst, trap, x_dst, y_dst); + } + } + else + { + pixman_image_t *tmp; + pixman_box32_t box; + int i; + + if (!get_trap_extents (op, dst, traps, n_traps, &box)) + return; + + if (!(tmp = pixman_image_create_bits ( + mask_format, box.x2 - box.x1, box.y2 - box.y1, NULL, -1))) + return; + + for (i = 0; i < n_traps; ++i) + { + const pixman_trapezoid_t *trap = &(traps[i]); + + if (!pixman_trapezoid_valid (trap)) + continue; + + pixman_rasterize_trapezoid (tmp, trap, - box.x1, - box.y1); + } + + pixman_image_composite (op, src, tmp, dst, + x_src + box.x1, y_src + box.y1, + 0, 0, + x_dst + box.x1, y_dst + box.y1, + box.x2 - box.x1, box.y2 - box.y1); + + pixman_image_unref (tmp); + } +} + +static int +greater_y (const pixman_point_fixed_t *a, const pixman_point_fixed_t *b) +{ + if (a->y == b->y) + return a->x > b->x; + return a->y > b->y; +} + +/* + * Note that the definition of this function is a bit odd because + * of the X coordinate space (y increasing downwards). + */ +static int +clockwise (const pixman_point_fixed_t *ref, + const pixman_point_fixed_t *a, + const pixman_point_fixed_t *b) +{ + pixman_point_fixed_t ad, bd; + + ad.x = a->x - ref->x; + ad.y = a->y - ref->y; + bd.x = b->x - ref->x; + bd.y = b->y - ref->y; + + return ((pixman_fixed_32_32_t) bd.y * ad.x - + (pixman_fixed_32_32_t) ad.y * bd.x) < 0; +} + +static void +triangle_to_trapezoids (const pixman_triangle_t *tri, pixman_trapezoid_t *traps) +{ + const pixman_point_fixed_t *top, *left, *right, *tmp; + + top = &tri->p1; + left = &tri->p2; + right = &tri->p3; + + if (greater_y (top, left)) + { + tmp = left; + left = top; + top = tmp; + } + + if (greater_y (top, right)) + { + tmp = right; + right = top; + top = tmp; + } + + if (clockwise (top, right, left)) + { + tmp = right; + right = left; + left = tmp; + } + + /* + * Two cases: + * + * + + + * / \ / \ + * / \ / \ + * / + + \ + * / -- -- \ + * / -- -- \ + * / --- --- \ + * +-- --+ + */ + + traps->top = top->y; + traps->left.p1 = *top; + traps->left.p2 = *left; + traps->right.p1 = *top; + traps->right.p2 = *right; + + if (right->y < left->y) + traps->bottom = right->y; + else + traps->bottom = left->y; + + traps++; + + *traps = *(traps - 1); + + if (right->y < left->y) + { + traps->top = right->y; + traps->bottom = left->y; + traps->right.p1 = *right; + traps->right.p2 = *left; + } + else + { + traps->top = left->y; + traps->bottom = right->y; + traps->left.p1 = *left; + traps->left.p2 = *right; + } +} + +static pixman_trapezoid_t * +convert_triangles (int n_tris, const pixman_triangle_t *tris) +{ + pixman_trapezoid_t *traps; + int i; + + if (n_tris <= 0) + return NULL; + + traps = pixman_malloc_ab (n_tris, 2 * sizeof (pixman_trapezoid_t)); + if (!traps) + return NULL; + + for (i = 0; i < n_tris; ++i) + triangle_to_trapezoids (&(tris[i]), traps + 2 * i); + + return traps; +} + +PIXMAN_EXPORT void +pixman_composite_triangles (pixman_op_t op, + pixman_image_t * src, + pixman_image_t * dst, + pixman_format_code_t mask_format, + int x_src, + int y_src, + int x_dst, + int y_dst, + int n_tris, + const pixman_triangle_t * tris) +{ + pixman_trapezoid_t *traps; + + if ((traps = convert_triangles (n_tris, tris))) + { + pixman_composite_trapezoids (op, src, dst, mask_format, + x_src, y_src, x_dst, y_dst, + n_tris * 2, traps); + + free (traps); + } +} + +PIXMAN_EXPORT void +pixman_add_triangles (pixman_image_t *image, + int32_t x_off, + int32_t y_off, + int n_tris, + const pixman_triangle_t *tris) +{ + pixman_trapezoid_t *traps; + + if ((traps = convert_triangles (n_tris, tris))) + { + pixman_add_trapezoids (image, x_off, y_off, + n_tris * 2, traps); + + free (traps); + } +} diff --git a/programs/develop/libraries/pixman/pixman-utils.c b/programs/develop/libraries/pixman/pixman-utils.c index 3ef88b7532..f31171f6d7 100644 --- a/programs/develop/libraries/pixman/pixman-utils.c +++ b/programs/develop/libraries/pixman/pixman-utils.c @@ -31,15 +31,19 @@ #include "pixman-private.h" pixman_bool_t -pixman_multiply_overflows_int (unsigned int a, - unsigned int b) +_pixman_multiply_overflows_size (size_t a, size_t b) +{ + return a >= SIZE_MAX / b; +} + +pixman_bool_t +_pixman_multiply_overflows_int (unsigned int a, unsigned int b) { return a >= INT32_MAX / b; } pixman_bool_t -pixman_addition_overflows_int (unsigned int a, - unsigned int b) +_pixman_addition_overflows_int (unsigned int a, unsigned int b) { return a > INT32_MAX - b; } @@ -67,61 +71,96 @@ pixman_malloc_abc (unsigned int a, return malloc (a * b * c); } -/* - * Helper routine to expand a color component from 0 < n <= 8 bits to 16 - * bits by replication. - */ -static inline uint64_t -expand16 (const uint8_t val, int nbits) +static force_inline uint16_t +float_to_unorm (float f, int n_bits) { - /* Start out with the high bit of val in the high bit of result. */ - uint16_t result = (uint16_t)val << (16 - nbits); + uint32_t u; - if (nbits == 0) - return 0; + if (f > 1.0) + f = 1.0; + if (f < 0.0) + f = 0.0; - /* Copy the bits in result, doubling the number of bits each time, until - * we fill all 16 bits. - */ - while (nbits < 16) - { - result |= result >> nbits; - nbits *= 2; - } + u = f * (1 << n_bits); + u -= (u >> n_bits); - return result; + return u; +} + +static force_inline float +unorm_to_float (uint16_t u, int n_bits) +{ + uint32_t m = ((1 << n_bits) - 1); + + return (u & m) * (1.f / (float)m); } /* - * This function expands images from ARGB8 format to ARGB16. To preserve - * precision, it needs to know the original source format. For example, if the - * source was PIXMAN_x1r5g5b5 and the red component contained bits 12345, then - * the expanded value is 12345123. To correctly expand this to 16 bits, it - * should be 1234512345123451 and not 1234512312345123. + * This function expands images from a8r8g8b8 to argb_t. To preserve + * precision, it needs to know from which source format the a8r8g8b8 pixels + * originally came. + * + * For example, if the source was PIXMAN_x1r5g5b5 and the red component + * contained bits 12345, then the 8-bit value is 12345123. To correctly + * expand this to floating point, it should be 12345 / 31.0 and not + * 12345123 / 255.0. */ void -pixman_expand (uint64_t * dst, - const uint32_t * src, - pixman_format_code_t format, - int width) +pixman_expand_to_float (argb_t *dst, + const uint32_t *src, + pixman_format_code_t format, + int width) { + static const float multipliers[16] = { + 0.0f, + 1.0f / ((1 << 1) - 1), + 1.0f / ((1 << 2) - 1), + 1.0f / ((1 << 3) - 1), + 1.0f / ((1 << 4) - 1), + 1.0f / ((1 << 5) - 1), + 1.0f / ((1 << 6) - 1), + 1.0f / ((1 << 7) - 1), + 1.0f / ((1 << 8) - 1), + 1.0f / ((1 << 9) - 1), + 1.0f / ((1 << 10) - 1), + 1.0f / ((1 << 11) - 1), + 1.0f / ((1 << 12) - 1), + 1.0f / ((1 << 13) - 1), + 1.0f / ((1 << 14) - 1), + 1.0f / ((1 << 15) - 1), + }; + int a_size, r_size, g_size, b_size; + int a_shift, r_shift, g_shift, b_shift; + float a_mul, r_mul, g_mul, b_mul; + uint32_t a_mask, r_mask, g_mask, b_mask; + int i; + + if (!PIXMAN_FORMAT_VIS (format)) + format = PIXMAN_a8r8g8b8; + /* * Determine the sizes of each component and the masks and shifts * required to extract them from the source pixel. */ - const int a_size = PIXMAN_FORMAT_A (format), - r_size = PIXMAN_FORMAT_R (format), - g_size = PIXMAN_FORMAT_G (format), - b_size = PIXMAN_FORMAT_B (format); - const int a_shift = 32 - a_size, - r_shift = 24 - r_size, - g_shift = 16 - g_size, - b_shift = 8 - b_size; - const uint8_t a_mask = ~(~0 << a_size), - r_mask = ~(~0 << r_size), - g_mask = ~(~0 << g_size), - b_mask = ~(~0 << b_size); - int i; + a_size = PIXMAN_FORMAT_A (format); + r_size = PIXMAN_FORMAT_R (format); + g_size = PIXMAN_FORMAT_G (format); + b_size = PIXMAN_FORMAT_B (format); + + a_shift = 32 - a_size; + r_shift = 24 - r_size; + g_shift = 16 - g_size; + b_shift = 8 - b_size; + + a_mask = ((1 << a_size) - 1); + r_mask = ((1 << r_size) - 1); + g_mask = ((1 << g_size) - 1); + b_mask = ((1 << b_size) - 1); + + a_mul = multipliers[a_size]; + r_mul = multipliers[r_size]; + g_mul = multipliers[g_size]; + b_mul = multipliers[b_size]; /* Start at the end so that we can do the expansion in place * when src == dst @@ -129,44 +168,52 @@ pixman_expand (uint64_t * dst, for (i = width - 1; i >= 0; i--) { const uint32_t pixel = src[i]; - const uint8_t a = (pixel >> a_shift) & a_mask, - r = (pixel >> r_shift) & r_mask, - g = (pixel >> g_shift) & g_mask, - b = (pixel >> b_shift) & b_mask; - const uint64_t a16 = a_size ? expand16 (a, a_size) : 0xffff, - r16 = expand16 (r, r_size), - g16 = expand16 (g, g_size), - b16 = expand16 (b, b_size); - dst[i] = a16 << 48 | r16 << 32 | g16 << 16 | b16; + dst[i].a = a_mask? ((pixel >> a_shift) & a_mask) * a_mul : 1.0f; + dst[i].r = ((pixel >> r_shift) & r_mask) * r_mul; + dst[i].g = ((pixel >> g_shift) & g_mask) * g_mul; + dst[i].b = ((pixel >> b_shift) & b_mask) * b_mul; } } -/* - * Contracting is easier than expanding. We just need to truncate the - * components. - */ +uint16_t +pixman_float_to_unorm (float f, int n_bits) +{ + return float_to_unorm (f, n_bits); +} + +float +pixman_unorm_to_float (uint16_t u, int n_bits) +{ + return unorm_to_float (u, n_bits); +} + void -pixman_contract (uint32_t * dst, - const uint64_t *src, - int width) +pixman_contract_from_float (uint32_t *dst, + const argb_t *src, + int width) { int i; - /* Start at the beginning so that we can do the contraction in - * place when src == dst - */ - for (i = 0; i < width; i++) + for (i = 0; i < width; ++i) { - const uint8_t a = src[i] >> 56, - r = src[i] >> 40, - g = src[i] >> 24, - b = src[i] >> 8; + uint8_t a, r, g, b; - dst[i] = a << 24 | r << 16 | g << 8 | b; + a = float_to_unorm (src[i].a, 8); + r = float_to_unorm (src[i].r, 8); + g = float_to_unorm (src[i].g, 8); + b = float_to_unorm (src[i].b, 8); + + dst[i] = (a << 24) | (r << 16) | (g << 8) | (b << 0); } } +uint32_t * +_pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask) +{ + return iter->buffer; +} + #define N_TMP_BOXES (16) pixman_bool_t @@ -236,7 +283,14 @@ pixman_region32_copy_from_region16 (pixman_region32_t *dst, return retval; } -#ifdef DEBUG +/* This function is exported for the sake of the test suite and not part + * of the ABI. + */ +PIXMAN_EXPORT pixman_implementation_t * +_pixman_internal_only_get_implementation (void) +{ + return get_implementation (); +} void _pixman_log_error (const char *function, const char *message) @@ -254,5 +308,3 @@ _pixman_log_error (const char *function, const char *message) n_messages++; } } - -#endif diff --git a/programs/develop/libraries/pixman/pixman-version.h b/programs/develop/libraries/pixman/pixman-version.h index 172beea370..404227f984 100644 --- a/programs/develop/libraries/pixman/pixman-version.h +++ b/programs/develop/libraries/pixman/pixman-version.h @@ -32,10 +32,10 @@ #endif #define PIXMAN_VERSION_MAJOR 0 -#define PIXMAN_VERSION_MINOR 20 +#define PIXMAN_VERSION_MINOR 30 #define PIXMAN_VERSION_MICRO 2 -#define PIXMAN_VERSION_STRING "0.20.2" +#define PIXMAN_VERSION_STRING "0.30.2" #define PIXMAN_VERSION_ENCODE(major, minor, micro) ( \ ((major) * 10000) \ diff --git a/programs/develop/libraries/pixman/pixman-x64-mmx-emulation.h b/programs/develop/libraries/pixman/pixman-x64-mmx-emulation.h deleted file mode 100644 index 378019cf27..0000000000 --- a/programs/develop/libraries/pixman/pixman-x64-mmx-emulation.h +++ /dev/null @@ -1,263 +0,0 @@ -#ifndef MMX_X64_H_INCLUDED -#define MMX_X64_H_INCLUDED - -/* Implementation of x64 MMX substitition functions, before - * pixman is reimplemented not to use __m64 type on Visual C++ - * - * Copyright (C)2009 by George Yohng - * Released in public domain. - */ - -#include - -#define M64C(a) (*(const __m64 *)(&a)) -#define M64U(a) (*(const unsigned long long *)(&a)) - -__inline __m64 -_m_from_int (int a) -{ - long long i64 = a; - - return M64C (i64); -} - -__inline __m64 -_mm_setzero_si64 () -{ - long long i64 = 0; - - return M64C (i64); -} - -__inline __m64 -_mm_set_pi32 (int i1, int i0) -{ - unsigned long long i64 = ((unsigned)i0) + (((unsigned long long)(unsigned)i1) << 32); - - return M64C (i64); -} - -__inline void -_m_empty () -{ -} - -__inline __m64 -_mm_set1_pi16 (short w) -{ - unsigned long long i64 = ((unsigned long long)(unsigned short)(w)) * 0x0001000100010001ULL; - - return M64C (i64); -} - -__inline int -_m_to_int (__m64 m) -{ - return m.m64_i32[0]; -} - -__inline __m64 -_mm_movepi64_pi64 (__m128i a) -{ - return M64C (a.m128i_i64[0]); -} - -__inline __m64 -_m_pand (__m64 a, __m64 b) -{ - unsigned long long i64 = M64U (a) & M64U (b); - - return M64C (i64); -} - -__inline __m64 -_m_por (__m64 a, __m64 b) -{ - unsigned long long i64 = M64U (a) | M64U (b); - - return M64C (i64); -} - -__inline __m64 -_m_pxor (__m64 a, __m64 b) -{ - unsigned long long i64 = M64U (a) ^ M64U (b); - - return M64C (i64); -} - -__inline __m64 -_m_pmulhuw (__m64 a, __m64 b) /* unoptimized */ -{ - unsigned short d[4] = - { - (unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0]) >> 16), - (unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]) >> 16), - (unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]) >> 16), - (unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]) >> 16) - }; - - return M64C (d[0]); -} - -__inline __m64 -_m_pmullw2 (__m64 a, __m64 b) /* unoptimized */ -{ - unsigned short d[4] = - { - (unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0])), - (unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1])), - (unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2])), - (unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3])) - }; - - return M64C (d[0]); -} - -__inline __m64 -_m_pmullw (__m64 a, __m64 b) /* unoptimized */ -{ - unsigned long long x = - ((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0]))) + - (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]))) << 16) + - (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]))) << 32) + - (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]))) << 48); - - return M64C (x); -} - -__inline __m64 -_m_paddusb (__m64 a, __m64 b) /* unoptimized */ -{ - unsigned long long x = (M64U (a) & 0x00FF00FF00FF00FFULL) + - (M64U (b) & 0x00FF00FF00FF00FFULL); - - unsigned long long y = ((M64U (a) >> 8) & 0x00FF00FF00FF00FFULL) + - ((M64U (b) >> 8) & 0x00FF00FF00FF00FFULL); - - x |= ((x & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF; - y |= ((y & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF; - - x = (x & 0x00FF00FF00FF00FFULL) | ((y & 0x00FF00FF00FF00FFULL) << 8); - - return M64C (x); -} - -__inline __m64 -_m_paddusw (__m64 a, __m64 b) /* unoptimized */ -{ - unsigned long long x = (M64U (a) & 0x0000FFFF0000FFFFULL) + - (M64U (b) & 0x0000FFFF0000FFFFULL); - - unsigned long long y = ((M64U (a) >> 16) & 0x0000FFFF0000FFFFULL) + - ((M64U (b) >> 16) & 0x0000FFFF0000FFFFULL); - - x |= ((x & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF; - y |= ((y & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF; - - x = (x & 0x0000FFFF0000FFFFULL) | ((y & 0x0000FFFF0000FFFFULL) << 16); - - return M64C (x); -} - -__inline __m64 -_m_pshufw (__m64 a, int n) /* unoptimized */ -{ - unsigned short d[4] = - { - a.m64_u16[n & 3], - a.m64_u16[(n >> 2) & 3], - a.m64_u16[(n >> 4) & 3], - a.m64_u16[(n >> 6) & 3] - }; - - return M64C (d[0]); -} - -__inline unsigned char -sat16 (unsigned short d) -{ - if (d > 0xFF) return 0xFF; - else return d & 0xFF; -} - -__inline __m64 -_m_packuswb (__m64 m1, __m64 m2) /* unoptimized */ -{ - unsigned char d[8] = - { - sat16 (m1.m64_u16[0]), - sat16 (m1.m64_u16[1]), - sat16 (m1.m64_u16[2]), - sat16 (m1.m64_u16[3]), - sat16 (m2.m64_u16[0]), - sat16 (m2.m64_u16[1]), - sat16 (m2.m64_u16[2]), - sat16 (m2.m64_u16[3]) - }; - - return M64C (d[0]); -} - -__inline __m64 _m_punpcklbw (__m64 m1, __m64 m2) /* unoptimized */ -{ - unsigned char d[8] = - { - m1.m64_u8[0], - m2.m64_u8[0], - m1.m64_u8[1], - m2.m64_u8[1], - m1.m64_u8[2], - m2.m64_u8[2], - m1.m64_u8[3], - m2.m64_u8[3], - }; - - return M64C (d[0]); -} - -__inline __m64 _m_punpckhbw (__m64 m1, __m64 m2) /* unoptimized */ -{ - unsigned char d[8] = - { - m1.m64_u8[4], - m2.m64_u8[4], - m1.m64_u8[5], - m2.m64_u8[5], - m1.m64_u8[6], - m2.m64_u8[6], - m1.m64_u8[7], - m2.m64_u8[7], - }; - - return M64C (d[0]); -} - -__inline __m64 _m_psrlwi (__m64 a, int n) /* unoptimized */ -{ - unsigned short d[4] = - { - a.m64_u16[0] >> n, - a.m64_u16[1] >> n, - a.m64_u16[2] >> n, - a.m64_u16[3] >> n - }; - - return M64C (d[0]); -} - -__inline __m64 _m_psrlqi (__m64 m, int n) -{ - unsigned long long x = M64U (m) >> n; - - return M64C (x); -} - -__inline __m64 _m_psllqi (__m64 m, int n) -{ - unsigned long long x = M64U (m) << n; - - return M64C (x); -} - -#endif /* MMX_X64_H_INCLUDED */ diff --git a/programs/develop/libraries/pixman/pixman-x86.c b/programs/develop/libraries/pixman/pixman-x86.c new file mode 100644 index 0000000000..57e4d1f351 --- /dev/null +++ b/programs/develop/libraries/pixman/pixman-x86.c @@ -0,0 +1,237 @@ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "pixman-private.h" + +#if defined(USE_X86_MMX) || defined (USE_SSE2) + +/* The CPU detection code needs to be in a file not compiled with + * "-mmmx -msse", as gcc would generate CMOV instructions otherwise + * that would lead to SIGILL instructions on old CPUs that don't have + * it. + */ + +typedef enum +{ + X86_MMX = (1 << 0), + X86_MMX_EXTENSIONS = (1 << 1), + X86_SSE = (1 << 2) | X86_MMX_EXTENSIONS, + X86_SSE2 = (1 << 3), + X86_CMOV = (1 << 4) +} cpu_features_t; + +#ifdef HAVE_GETISAX + +#include + +static cpu_features_t +detect_cpu_features (void) +{ + cpu_features_t features = 0; + unsigned int result = 0; + + if (getisax (&result, 1)) + { + if (result & AV_386_CMOV) + features |= X86_CMOV; + if (result & AV_386_MMX) + features |= X86_MMX; + if (result & AV_386_AMD_MMX) + features |= X86_MMX_EXTENSIONS; + if (result & AV_386_SSE) + features |= X86_SSE; + if (result & AV_386_SSE2) + features |= X86_SSE2; + } + + return features; +} + +#else + +#define _PIXMAN_X86_64 \ + (defined(__amd64__) || defined(__x86_64__) || defined(_M_AMD64)) + +static pixman_bool_t +have_cpuid (void) +{ +#if _PIXMAN_X86_64 || defined (_MSC_VER) + + return TRUE; + +#elif defined (__GNUC__) + uint32_t result; + + __asm__ volatile ( + "pushf" "\n\t" + "pop %%eax" "\n\t" + "mov %%eax, %%ecx" "\n\t" + "xor $0x00200000, %%eax" "\n\t" + "push %%eax" "\n\t" + "popf" "\n\t" + "pushf" "\n\t" + "pop %%eax" "\n\t" + "xor %%ecx, %%eax" "\n\t" + "mov %%eax, %0" "\n\t" + : "=r" (result) + : + : "%eax", "%ecx"); + + return !!result; + +#else +#error "Unknown compiler" +#endif +} + +static void +pixman_cpuid (uint32_t feature, + uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) +{ +#if defined (__GNUC__) + +#if _PIXMAN_X86_64 + __asm__ volatile ( + "cpuid" "\n\t" + : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d) + : "a" (feature)); +#else + /* On x86-32 we need to be careful about the handling of %ebx + * and %esp. We can't declare either one as clobbered + * since they are special registers (%ebx is the "PIC + * register" holding an offset to global data, %esp the + * stack pointer), so we need to make sure that %ebx is + * preserved, and that %esp has its original value when + * accessing the output operands. + */ + __asm__ volatile ( + "xchg %%ebx, %1" "\n\t" + "cpuid" "\n\t" + "xchg %%ebx, %1" "\n\t" + : "=a" (*a), "=r" (*b), "=c" (*c), "=d" (*d) + : "a" (feature)); +#endif + +#elif defined (_MSC_VER) + int info[4]; + + __cpuid (info, feature); + + *a = info[0]; + *b = info[1]; + *c = info[2]; + *d = info[3]; +#else +#error Unknown compiler +#endif +} + +static cpu_features_t +detect_cpu_features (void) +{ + uint32_t a, b, c, d; + cpu_features_t features = 0; + + if (!have_cpuid()) + return features; + + /* Get feature bits */ + pixman_cpuid (0x01, &a, &b, &c, &d); + if (d & (1 << 15)) + features |= X86_CMOV; + if (d & (1 << 23)) + features |= X86_MMX; + if (d & (1 << 25)) + features |= X86_SSE; + if (d & (1 << 26)) + features |= X86_SSE2; + + /* Check for AMD specific features */ + if ((features & X86_MMX) && !(features & X86_SSE)) + { + char vendor[13]; + + /* Get vendor string */ + memset (vendor, 0, sizeof vendor); + + pixman_cpuid (0x00, &a, &b, &c, &d); + memcpy (vendor + 0, &b, 4); + memcpy (vendor + 4, &d, 4); + memcpy (vendor + 8, &c, 4); + + if (strcmp (vendor, "AuthenticAMD") == 0 || + strcmp (vendor, "Geode by NSC") == 0) + { + pixman_cpuid (0x80000000, &a, &b, &c, &d); + if (a >= 0x80000001) + { + pixman_cpuid (0x80000001, &a, &b, &c, &d); + + if (d & (1 << 22)) + features |= X86_MMX_EXTENSIONS; + } + } + } + + return features; +} + +#endif + +static pixman_bool_t +have_feature (cpu_features_t feature) +{ + static pixman_bool_t initialized; + static cpu_features_t features; + + if (!initialized) + { + features = detect_cpu_features(); + initialized = TRUE; + } + + return (features & feature) == feature; +} + +#endif + +pixman_implementation_t * +_pixman_x86_get_implementations (pixman_implementation_t *imp) +{ +#define MMX_BITS (X86_MMX | X86_MMX_EXTENSIONS) +#define SSE2_BITS (X86_MMX | X86_MMX_EXTENSIONS | X86_SSE | X86_SSE2) + +#ifdef USE_X86_MMX + if (!_pixman_disabled ("mmx") && have_feature (MMX_BITS)) + imp = _pixman_implementation_create_mmx (imp); +#endif + +#ifdef USE_SSE2 + if (!_pixman_disabled ("sse2") && have_feature (SSE2_BITS)) + imp = _pixman_implementation_create_sse2 (imp); +#endif + + return imp; +} diff --git a/programs/develop/libraries/pixman/pixman.c b/programs/develop/libraries/pixman/pixman.c index 3a62b2d7df..184f0c4e6a 100644 --- a/programs/develop/libraries/pixman/pixman.c +++ b/programs/develop/libraries/pixman/pixman.c @@ -30,16 +30,15 @@ #include -static force_inline pixman_implementation_t * -get_implementation (void) +pixman_implementation_t *global_implementation; + +#ifdef TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR +static void __attribute__((constructor)) +pixman_constructor (void) { - static pixman_implementation_t *global_implementation; - - if (!global_implementation) - global_implementation = _pixman_choose_implementation (); - - return global_implementation; + global_implementation = _pixman_choose_implementation (); } +#endif typedef struct operator_info_t operator_info_t; @@ -153,57 +152,6 @@ optimize_operator (pixman_op_t op, return operator_table[op].opaque_info[is_dest_opaque | is_source_opaque]; } -static void -apply_workaround (pixman_image_t *image, - int32_t * x, - int32_t * y, - uint32_t ** save_bits, - int * save_dx, - int * save_dy) -{ - if (image && (image->common.flags & FAST_PATH_NEEDS_WORKAROUND)) - { - /* Some X servers generate images that point to the - * wrong place in memory, but then set the clip region - * to point to the right place. Because of an old bug - * in pixman, this would actually work. - * - * Here we try and undo the damage - */ - int bpp = PIXMAN_FORMAT_BPP (image->bits.format) / 8; - pixman_box32_t *extents; - uint8_t *t; - int dx, dy; - - extents = pixman_region32_extents (&(image->common.clip_region)); - dx = extents->x1; - dy = extents->y1; - - *save_bits = image->bits.bits; - - *x -= dx; - *y -= dy; - pixman_region32_translate (&(image->common.clip_region), -dx, -dy); - - t = (uint8_t *)image->bits.bits; - t += dy * image->bits.rowstride * 4 + dx * bpp; - image->bits.bits = (uint32_t *)t; - - *save_dx = dx; - *save_dy = dy; - } -} - -static void -unapply_workaround (pixman_image_t *image, uint32_t *bits, int dx, int dy) -{ - if (image && (image->common.flags & FAST_PATH_NEEDS_WORKAROUND)) - { - image->bits.bits = bits; - pixman_region32_translate (&image->common.clip_region, dx, dy); - } -} - /* * Computing composite region */ @@ -276,19 +224,19 @@ clip_source_image (pixman_region32_t * region, * returns FALSE if the final region is empty. Indistinguishable from * an allocation failure, but rendering ignores those anyways. */ -static pixman_bool_t -pixman_compute_composite_region32 (pixman_region32_t * region, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +pixman_bool_t +_pixman_compute_composite_region32 (pixman_region32_t * region, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dest_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { region->extents.x1 = dest_x; region->extents.x2 = dest_x + width; @@ -297,8 +245,8 @@ pixman_compute_composite_region32 (pixman_region32_t * region, region->extents.x1 = MAX (region->extents.x1, 0); region->extents.y1 = MAX (region->extents.y1, 0); - region->extents.x2 = MIN (region->extents.x2, dst_image->bits.width); - region->extents.y2 = MIN (region->extents.y2, dst_image->bits.height); + region->extents.x2 = MIN (region->extents.x2, dest_image->bits.width); + region->extents.y2 = MIN (region->extents.y2, dest_image->bits.height); region->data = 0; @@ -313,29 +261,29 @@ pixman_compute_composite_region32 (pixman_region32_t * region, return FALSE; } - if (dst_image->common.have_clip_region) + if (dest_image->common.have_clip_region) { - if (!clip_general_image (region, &dst_image->common.clip_region, 0, 0)) + if (!clip_general_image (region, &dest_image->common.clip_region, 0, 0)) return FALSE; } - if (dst_image->common.alpha_map) + if (dest_image->common.alpha_map) { if (!pixman_region32_intersect_rect (region, region, - dst_image->common.alpha_origin_x, - dst_image->common.alpha_origin_y, - dst_image->common.alpha_map->width, - dst_image->common.alpha_map->height)) + dest_image->common.alpha_origin_x, + dest_image->common.alpha_origin_y, + dest_image->common.alpha_map->width, + dest_image->common.alpha_map->height)) { return FALSE; } if (!pixman_region32_not_empty (region)) return FALSE; - if (dst_image->common.alpha_map->common.have_clip_region) + if (dest_image->common.alpha_map->common.have_clip_region) { - if (!clip_general_image (region, &dst_image->common.alpha_map->common.clip_region, - -dst_image->common.alpha_origin_x, - -dst_image->common.alpha_origin_y)) + if (!clip_general_image (region, &dest_image->common.alpha_map->common.clip_region, + -dest_image->common.alpha_origin_x, + -dest_image->common.alpha_origin_y)) { return FALSE; } @@ -377,220 +325,89 @@ pixman_compute_composite_region32 (pixman_region32_t * region, return TRUE; } -#define N_CACHED_FAST_PATHS 8 - typedef struct { - struct - { - pixman_implementation_t * imp; - pixman_fast_path_t fast_path; - } cache [N_CACHED_FAST_PATHS]; -} cache_t; + pixman_fixed_48_16_t x1; + pixman_fixed_48_16_t y1; + pixman_fixed_48_16_t x2; + pixman_fixed_48_16_t y2; +} box_48_16_t; -PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache); - -static force_inline pixman_bool_t -lookup_composite_function (pixman_op_t op, - pixman_format_code_t src_format, - uint32_t src_flags, - pixman_format_code_t mask_format, - uint32_t mask_flags, - pixman_format_code_t dest_format, - uint32_t dest_flags, - pixman_implementation_t **out_imp, - pixman_composite_func_t *out_func) +static pixman_bool_t +compute_transformed_extents (pixman_transform_t *transform, + const pixman_box32_t *extents, + box_48_16_t *transformed) { - pixman_implementation_t *imp; - cache_t *cache; + pixman_fixed_48_16_t tx1, ty1, tx2, ty2; + pixman_fixed_t x1, y1, x2, y2; int i; - /* Check cache for fast paths */ - cache = PIXMAN_GET_THREAD_LOCAL (fast_path_cache); + x1 = pixman_int_to_fixed (extents->x1) + pixman_fixed_1 / 2; + y1 = pixman_int_to_fixed (extents->y1) + pixman_fixed_1 / 2; + x2 = pixman_int_to_fixed (extents->x2) - pixman_fixed_1 / 2; + y2 = pixman_int_to_fixed (extents->y2) - pixman_fixed_1 / 2; - for (i = 0; i < N_CACHED_FAST_PATHS; ++i) + if (!transform) { - const pixman_fast_path_t *info = &(cache->cache[i].fast_path); + transformed->x1 = x1; + transformed->y1 = y1; + transformed->x2 = x2; + transformed->y2 = y2; - /* Note that we check for equality here, not whether - * the cached fast path matches. This is to prevent - * us from selecting an overly general fast path - * when a more specific one would work. - */ - if (info->op == op && - info->src_format == src_format && - info->mask_format == mask_format && - info->dest_format == dest_format && - info->src_flags == src_flags && - info->mask_flags == mask_flags && - info->dest_flags == dest_flags && - info->func) - { - *out_imp = cache->cache[i].imp; - *out_func = cache->cache[i].fast_path.func; - - goto update_cache; - } + return TRUE; } - for (imp = get_implementation (); imp != NULL; imp = imp->delegate) + tx1 = ty1 = INT64_MAX; + tx2 = ty2 = INT64_MIN; + + for (i = 0; i < 4; ++i) { - const pixman_fast_path_t *info = imp->fast_paths; + pixman_fixed_48_16_t tx, ty; + pixman_vector_t v; - while (info->op != PIXMAN_OP_NONE) - { - if ((info->op == op || info->op == PIXMAN_OP_any) && - /* Formats */ - ((info->src_format == src_format) || - (info->src_format == PIXMAN_any)) && - ((info->mask_format == mask_format) || - (info->mask_format == PIXMAN_any)) && - ((info->dest_format == dest_format) || - (info->dest_format == PIXMAN_any)) && - /* Flags */ - (info->src_flags & src_flags) == info->src_flags && - (info->mask_flags & mask_flags) == info->mask_flags && - (info->dest_flags & dest_flags) == info->dest_flags) - { - *out_imp = imp; - *out_func = info->func; + v.vector[0] = (i & 0x01)? x1 : x2; + v.vector[1] = (i & 0x02)? y1 : y2; + v.vector[2] = pixman_fixed_1; - /* Set i to the last spot in the cache so that the - * move-to-front code below will work - */ - i = N_CACHED_FAST_PATHS - 1; + if (!pixman_transform_point (transform, &v)) + return FALSE; - goto update_cache; - } + tx = (pixman_fixed_48_16_t)v.vector[0]; + ty = (pixman_fixed_48_16_t)v.vector[1]; - ++info; - } + if (tx < tx1) + tx1 = tx; + if (ty < ty1) + ty1 = ty; + if (tx > tx2) + tx2 = tx; + if (ty > ty2) + ty2 = ty; } - return FALSE; -update_cache: - if (i) - { - while (i--) - cache->cache[i + 1] = cache->cache[i]; - - cache->cache[0].imp = *out_imp; - cache->cache[0].fast_path.op = op; - cache->cache[0].fast_path.src_format = src_format; - cache->cache[0].fast_path.src_flags = src_flags; - cache->cache[0].fast_path.mask_format = mask_format; - cache->cache[0].fast_path.mask_flags = mask_flags; - cache->cache[0].fast_path.dest_format = dest_format; - cache->cache[0].fast_path.dest_flags = dest_flags; - cache->cache[0].fast_path.func = *out_func; - } + transformed->x1 = tx1; + transformed->y1 = ty1; + transformed->x2 = tx2; + transformed->y2 = ty2; return TRUE; } -static pixman_bool_t -compute_sample_extents (pixman_transform_t *transform, - pixman_box32_t *extents, int x, int y, - pixman_fixed_t x_off, pixman_fixed_t y_off, - pixman_fixed_t width, pixman_fixed_t height) -{ - pixman_fixed_t x1, y1, x2, y2; - pixman_fixed_48_16_t tx1, ty1, tx2, ty2; - - /* We have checked earlier that (extents->x1 - x) etc. fit in a pixman_fixed_t */ - x1 = (pixman_fixed_48_16_t)pixman_int_to_fixed (extents->x1 - x) + pixman_fixed_1 / 2; - y1 = (pixman_fixed_48_16_t)pixman_int_to_fixed (extents->y1 - y) + pixman_fixed_1 / 2; - x2 = (pixman_fixed_48_16_t)pixman_int_to_fixed (extents->x2 - x) - pixman_fixed_1 / 2; - y2 = (pixman_fixed_48_16_t)pixman_int_to_fixed (extents->y2 - y) - pixman_fixed_1 / 2; - - if (!transform) - { - tx1 = (pixman_fixed_48_16_t)x1; - ty1 = (pixman_fixed_48_16_t)y1; - tx2 = (pixman_fixed_48_16_t)x2; - ty2 = (pixman_fixed_48_16_t)y2; - } - else - { - int i; - - /* Silence GCC */ - tx1 = ty1 = tx2 = ty2 = 0; - - for (i = 0; i < 4; ++i) - { - pixman_fixed_48_16_t tx, ty; - pixman_vector_t v; - - v.vector[0] = (i & 0x01)? x1 : x2; - v.vector[1] = (i & 0x02)? y1 : y2; - v.vector[2] = pixman_fixed_1; - - if (!pixman_transform_point (transform, &v)) - return FALSE; - - tx = (pixman_fixed_48_16_t)v.vector[0]; - ty = (pixman_fixed_48_16_t)v.vector[1]; - - if (i == 0) - { - tx1 = tx; - ty1 = ty; - tx2 = tx; - ty2 = ty; - } - else - { - if (tx < tx1) - tx1 = tx; - if (ty < ty1) - ty1 = ty; - if (tx > tx2) - tx2 = tx; - if (ty > ty2) - ty2 = ty; - } - } - } - - /* Expand the source area by a tiny bit so account of different rounding that - * may happen during sampling. Note that (8 * pixman_fixed_e) is very far from - * 0.5 so this won't cause the area computed to be overly pessimistic. - */ - tx1 += x_off - 8 * pixman_fixed_e; - ty1 += y_off - 8 * pixman_fixed_e; - tx2 += x_off + width + 8 * pixman_fixed_e; - ty2 += y_off + height + 8 * pixman_fixed_e; - - if (tx1 < pixman_min_fixed_48_16 || tx1 > pixman_max_fixed_48_16 || - ty1 < pixman_min_fixed_48_16 || ty1 > pixman_max_fixed_48_16 || - tx2 < pixman_min_fixed_48_16 || tx2 > pixman_max_fixed_48_16 || - ty2 < pixman_min_fixed_48_16 || ty2 > pixman_max_fixed_48_16) - { - return FALSE; - } - else - { - extents->x1 = pixman_fixed_to_int (tx1); - extents->y1 = pixman_fixed_to_int (ty1); - extents->x2 = pixman_fixed_to_int (tx2) + 1; - extents->y2 = pixman_fixed_to_int (ty2) + 1; - - return TRUE; - } -} - #define IS_16BIT(x) (((x) >= INT16_MIN) && ((x) <= INT16_MAX)) +#define ABS(f) (((f) < 0)? (-(f)) : (f)) +#define IS_16_16(f) (((f) >= pixman_min_fixed_48_16 && ((f) <= pixman_max_fixed_48_16))) static pixman_bool_t -analyze_extent (pixman_image_t *image, int x, int y, - const pixman_box32_t *extents, uint32_t *flags) +analyze_extent (pixman_image_t *image, + const pixman_box32_t *extents, + uint32_t *flags) { pixman_transform_t *transform; - pixman_fixed_t *params; pixman_fixed_t x_off, y_off; pixman_fixed_t width, height; - pixman_box32_t ex; + pixman_fixed_t *params; + box_48_16_t transformed; + pixman_box32_t exp_extents; if (!image) return TRUE; @@ -600,10 +417,10 @@ analyze_extent (pixman_image_t *image, int x, int y, * check here that the expanded-by-one source * extents in destination space fits in 16 bits */ - if (!IS_16BIT (extents->x1 - x - 1) || - !IS_16BIT (extents->y1 - y - 1) || - !IS_16BIT (extents->x2 - x + 1) || - !IS_16BIT (extents->y2 - y + 1)) + if (!IS_16BIT (extents->x1 - 1) || + !IS_16BIT (extents->y1 - 1) || + !IS_16BIT (extents->x2 + 1) || + !IS_16BIT (extents->y2 + 1)) { return FALSE; } @@ -618,18 +435,16 @@ analyze_extent (pixman_image_t *image, int x, int y, if (image->bits.width >= 0x7fff || image->bits.height >= 0x7fff) return FALSE; -#define ID_AND_NEAREST (FAST_PATH_ID_TRANSFORM | FAST_PATH_NEAREST_FILTER) - - if ((image->common.flags & ID_AND_NEAREST) == ID_AND_NEAREST && - extents->x1 - x >= 0 && - extents->y1 - y >= 0 && - extents->x2 - x <= image->bits.width && - extents->y2 - y <= image->bits.height) + if ((image->common.flags & FAST_PATH_ID_TRANSFORM) == FAST_PATH_ID_TRANSFORM && + extents->x1 >= 0 && + extents->y1 >= 0 && + extents->x2 <= image->bits.width && + extents->y2 <= image->bits.height) { - *flags |= FAST_PATH_SAMPLES_COVER_CLIP; + *flags |= FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; return TRUE; } - + switch (image->common.filter) { case PIXMAN_FILTER_CONVOLUTION: @@ -640,6 +455,14 @@ analyze_extent (pixman_image_t *image, int x, int y, height = params[1]; break; + case PIXMAN_FILTER_SEPARABLE_CONVOLUTION: + params = image->common.filter_params; + x_off = - pixman_fixed_e - ((params[0] - pixman_fixed_1) >> 1); + y_off = - pixman_fixed_e - ((params[1] - pixman_fixed_1) >> 1); + width = params[0]; + height = params[1]; + break; + case PIXMAN_FILTER_GOOD: case PIXMAN_FILTER_BEST: case PIXMAN_FILTER_BILINEAR: @@ -660,17 +483,6 @@ analyze_extent (pixman_image_t *image, int x, int y, default: return FALSE; } - - /* Check whether the non-expanded, transformed extent is entirely within - * the source image, and set the FAST_PATH_SAMPLES_COVER_CLIP if it is. - */ - ex = *extents; - if (compute_sample_extents (transform, &ex, x, y, x_off, y_off, width, height) && - ex.x1 >= 0 && ex.y1 >= 0 && - ex.x2 <= image->bits.width && ex.y2 <= image->bits.height) - { - *flags |= FAST_PATH_SAMPLES_COVER_CLIP; - } } else { @@ -680,18 +492,58 @@ analyze_extent (pixman_image_t *image, int x, int y, height = 0; } - /* Check that the extents expanded by one don't overflow. This ensures that - * compositing functions can simply walk the source space using 16.16 - * variables without worrying about overflow. - */ - ex.x1 = extents->x1 - 1; - ex.y1 = extents->y1 - 1; - ex.x2 = extents->x2 + 1; - ex.y2 = extents->y2 + 1; - - if (!compute_sample_extents (transform, &ex, x, y, x_off, y_off, width, height)) + if (!compute_transformed_extents (transform, extents, &transformed)) return FALSE; + /* Expand the source area by a tiny bit so account of different rounding that + * may happen during sampling. Note that (8 * pixman_fixed_e) is very far from + * 0.5 so this won't cause the area computed to be overly pessimistic. + */ + transformed.x1 -= 8 * pixman_fixed_e; + transformed.y1 -= 8 * pixman_fixed_e; + transformed.x2 += 8 * pixman_fixed_e; + transformed.y2 += 8 * pixman_fixed_e; + + if (image->common.type == BITS) + { + if (pixman_fixed_to_int (transformed.x1) >= 0 && + pixman_fixed_to_int (transformed.y1) >= 0 && + pixman_fixed_to_int (transformed.x2) < image->bits.width && + pixman_fixed_to_int (transformed.y2) < image->bits.height) + { + *flags |= FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; + } + + if (pixman_fixed_to_int (transformed.x1 - pixman_fixed_1 / 2) >= 0 && + pixman_fixed_to_int (transformed.y1 - pixman_fixed_1 / 2) >= 0 && + pixman_fixed_to_int (transformed.x2 + pixman_fixed_1 / 2) < image->bits.width && + pixman_fixed_to_int (transformed.y2 + pixman_fixed_1 / 2) < image->bits.height) + { + *flags |= FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR; + } + } + + /* Check we don't overflow when the destination extents are expanded by one. + * This ensures that compositing functions can simply walk the source space + * using 16.16 variables without worrying about overflow. + */ + exp_extents = *extents; + exp_extents.x1 -= 1; + exp_extents.y1 -= 1; + exp_extents.x2 += 1; + exp_extents.y2 += 1; + + if (!compute_transformed_extents (transform, &exp_extents, &transformed)) + return FALSE; + + if (!IS_16_16 (transformed.x1 + x_off - 8 * pixman_fixed_e) || + !IS_16_16 (transformed.y1 + y_off - 8 * pixman_fixed_e) || + !IS_16_16 (transformed.x2 + x_off + 8 * pixman_fixed_e + width) || + !IS_16_16 (transformed.y2 + y_off + 8 * pixman_fixed_e + height)) + { + return FALSE; + } + return TRUE; } @@ -729,18 +581,13 @@ pixman_image_composite32 (pixman_op_t op, int32_t height) { pixman_format_code_t src_format, mask_format, dest_format; - uint32_t src_flags, mask_flags, dest_flags; pixman_region32_t region; - pixman_box32_t *extents; - uint32_t *src_bits; - int src_dx, src_dy; - uint32_t *mask_bits; - int mask_dx, mask_dy; - uint32_t *dest_bits; - int dest_dx, dest_dy; - pixman_bool_t need_workaround; + pixman_box32_t extents; pixman_implementation_t *imp; pixman_composite_func_t func; + pixman_composite_info_t info; + const pixman_box32_t *pbox; + int n; _pixman_image_validate (src); if (mask) @@ -748,26 +595,27 @@ pixman_image_composite32 (pixman_op_t op, _pixman_image_validate (dest); src_format = src->common.extended_format_code; - src_flags = src->common.flags; + info.src_flags = src->common.flags; - if (mask) + if (mask && !(mask->common.flags & FAST_PATH_IS_OPAQUE)) { mask_format = mask->common.extended_format_code; - mask_flags = mask->common.flags; + info.mask_flags = mask->common.flags; } else { mask_format = PIXMAN_null; - mask_flags = FAST_PATH_IS_OPAQUE; + info.mask_flags = FAST_PATH_IS_OPAQUE; } dest_format = dest->common.extended_format_code; - dest_flags = dest->common.flags; + info.dest_flags = dest->common.flags; /* Check for pixbufs */ if ((mask_format == PIXMAN_a8r8g8b8 || mask_format == PIXMAN_a8b8g8r8) && (src->type == BITS && src->bits.bits == mask->bits.bits) && (src->common.repeat == mask->common.repeat) && + (info.src_flags & info.mask_flags & FAST_PATH_ID_TRANSFORM) && (src_x == mask_x && src_y == mask_y)) { if (src_format == PIXMAN_x8b8g8r8) @@ -776,89 +624,92 @@ pixman_image_composite32 (pixman_op_t op, src_format = mask_format = PIXMAN_rpixbuf; } - /* Check for workaround */ - need_workaround = (src_flags | mask_flags | dest_flags) & FAST_PATH_NEEDS_WORKAROUND; - - if (need_workaround) - { - apply_workaround (src, &src_x, &src_y, &src_bits, &src_dx, &src_dy); - apply_workaround (mask, &mask_x, &mask_y, &mask_bits, &mask_dx, &mask_dy); - apply_workaround (dest, &dest_x, &dest_y, &dest_bits, &dest_dx, &dest_dy); - } - pixman_region32_init (®ion); - if (!pixman_compute_composite_region32 ( + if (!_pixman_compute_composite_region32 ( ®ion, src, mask, dest, src_x, src_y, mask_x, mask_y, dest_x, dest_y, width, height)) { goto out; } - extents = pixman_region32_extents (®ion); + extents = *pixman_region32_extents (®ion); - if (!analyze_extent (src, dest_x - src_x, dest_y - src_y, extents, &src_flags)) + extents.x1 -= dest_x - src_x; + extents.y1 -= dest_y - src_y; + extents.x2 -= dest_x - src_x; + extents.y2 -= dest_y - src_y; + + if (!analyze_extent (src, &extents, &info.src_flags)) goto out; - if (!analyze_extent (mask, dest_x - mask_x, dest_y - mask_y, extents, &mask_flags)) + extents.x1 -= src_x - mask_x; + extents.y1 -= src_y - mask_y; + extents.x2 -= src_x - mask_x; + extents.y2 -= src_y - mask_y; + + if (!analyze_extent (mask, &extents, &info.mask_flags)) goto out; - /* If the clip is within the source samples, and the samples are opaque, - * then the source is effectively opaque. + /* If the clip is within the source samples, and the samples are + * opaque, then the source is effectively opaque. */ -#define BOTH (FAST_PATH_SAMPLES_OPAQUE | FAST_PATH_SAMPLES_COVER_CLIP) +#define NEAREST_OPAQUE (FAST_PATH_SAMPLES_OPAQUE | \ + FAST_PATH_NEAREST_FILTER | \ + FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) +#define BILINEAR_OPAQUE (FAST_PATH_SAMPLES_OPAQUE | \ + FAST_PATH_BILINEAR_FILTER | \ + FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR) + + if ((info.src_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE || + (info.src_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE) + { + info.src_flags |= FAST_PATH_IS_OPAQUE; + } + + if ((info.mask_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE || + (info.mask_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE) + { + info.mask_flags |= FAST_PATH_IS_OPAQUE; + } - if ((src_flags & BOTH) == BOTH) - src_flags |= FAST_PATH_IS_OPAQUE; - - if ((mask_flags & BOTH) == BOTH) - mask_flags |= FAST_PATH_IS_OPAQUE; - /* * Check if we can replace our operator by a simpler one * if the src or dest are opaque. The output operator should be * mathematically equivalent to the source. */ - op = optimize_operator (op, src_flags, mask_flags, dest_flags); - if (op == PIXMAN_OP_DST) - goto out; + info.op = optimize_operator (op, info.src_flags, info.mask_flags, info.dest_flags); - if (lookup_composite_function (op, - src_format, src_flags, - mask_format, mask_flags, - dest_format, dest_flags, - &imp, &func)) + _pixman_implementation_lookup_composite ( + get_implementation (), info.op, + src_format, info.src_flags, + mask_format, info.mask_flags, + dest_format, info.dest_flags, + &imp, &func); + + info.src_image = src; + info.mask_image = mask; + info.dest_image = dest; + + pbox = pixman_region32_rectangles (®ion, &n); + + while (n--) { - const pixman_box32_t *pbox; - int n; + info.src_x = pbox->x1 + src_x - dest_x; + info.src_y = pbox->y1 + src_y - dest_y; + info.mask_x = pbox->x1 + mask_x - dest_x; + info.mask_y = pbox->y1 + mask_y - dest_y; + info.dest_x = pbox->x1; + info.dest_y = pbox->y1; + info.width = pbox->x2 - pbox->x1; + info.height = pbox->y2 - pbox->y1; - pbox = pixman_region32_rectangles (®ion, &n); - - while (n--) - { - func (imp, op, - src, mask, dest, - pbox->x1 + src_x - dest_x, - pbox->y1 + src_y - dest_y, - pbox->x1 + mask_x - dest_x, - pbox->y1 + mask_y - dest_y, - pbox->x1, - pbox->y1, - pbox->x2 - pbox->x1, - pbox->y2 - pbox->y1); - - pbox++; - } + func (imp, &info); + + pbox++; } out: - if (need_workaround) - { - unapply_workaround (src, src_bits, src_dx, src_dy); - unapply_workaround (mask, mask_bits, mask_dx, mask_dy); - unapply_workaround (dest, dest_bits, dest_dx, dest_dy); - } - pixman_region32_fini (®ion); } @@ -889,8 +740,8 @@ pixman_blt (uint32_t *src_bits, int dst_bpp, int src_x, int src_y, - int dst_x, - int dst_y, + int dest_x, + int dest_y, int width, int height) { @@ -898,7 +749,7 @@ pixman_blt (uint32_t *src_bits, src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, src_x, src_y, - dst_x, dst_y, + dest_x, dest_y, width, height); } @@ -910,10 +761,10 @@ pixman_fill (uint32_t *bits, int y, int width, int height, - uint32_t xor) + uint32_t filler) { return _pixman_implementation_fill ( - get_implementation(), bits, stride, bpp, x, y, width, height, xor); + get_implementation(), bits, stride, bpp, x, y, width, height, filler); } static uint32_t @@ -927,9 +778,9 @@ color_to_uint32 (const pixman_color_t *color) } static pixman_bool_t -color_to_pixel (pixman_color_t * color, - uint32_t * pixel, - pixman_format_code_t format) +color_to_pixel (const pixman_color_t *color, + uint32_t * pixel, + pixman_format_code_t format) { uint32_t c = color_to_uint32 (color); @@ -939,9 +790,12 @@ color_to_pixel (pixman_color_t * color, format == PIXMAN_x8b8g8r8 || format == PIXMAN_b8g8r8a8 || format == PIXMAN_b8g8r8x8 || + format == PIXMAN_r8g8b8a8 || + format == PIXMAN_r8g8b8x8 || format == PIXMAN_r5g6b5 || format == PIXMAN_b5g6r5 || - format == PIXMAN_a8)) + format == PIXMAN_a8 || + format == PIXMAN_a1)) { return FALSE; } @@ -960,12 +814,16 @@ color_to_pixel (pixman_color_t * color, ((c & 0x0000ff00) << 8) | ((c & 0x000000ff) << 24); } + if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_RGBA) + c = ((c & 0xff000000) >> 24) | (c << 8); - if (format == PIXMAN_a8) + if (format == PIXMAN_a1) + c = c >> 31; + else if (format == PIXMAN_a8) c = c >> 24; else if (format == PIXMAN_r5g6b5 || format == PIXMAN_b5g6r5) - c = CONVERT_8888_TO_0565 (c); + c = convert_8888_to_0565 (c); #if 0 printf ("color: %x %x %x %x\n", color->alpha, color->red, color->green, color->blue); @@ -979,7 +837,7 @@ color_to_pixel (pixman_color_t * color, PIXMAN_EXPORT pixman_bool_t pixman_image_fill_rectangles (pixman_op_t op, pixman_image_t * dest, - pixman_color_t * color, + const pixman_color_t * color, int n_rects, const pixman_rectangle16_t *rects) { @@ -1018,7 +876,7 @@ pixman_image_fill_rectangles (pixman_op_t op, PIXMAN_EXPORT pixman_bool_t pixman_image_fill_boxes (pixman_op_t op, pixman_image_t * dest, - pixman_color_t * color, + const pixman_color_t *color, int n_boxes, const pixman_box32_t *boxes) { @@ -1163,11 +1021,14 @@ pixman_format_supported_source (pixman_format_code_t format) case PIXMAN_a2r10g10b10: case PIXMAN_x2r10g10b10: case PIXMAN_a8r8g8b8: + case PIXMAN_a8r8g8b8_sRGB: case PIXMAN_x8r8g8b8: case PIXMAN_a8b8g8r8: case PIXMAN_x8b8g8r8: case PIXMAN_b8g8r8a8: case PIXMAN_b8g8r8x8: + case PIXMAN_r8g8b8a8: + case PIXMAN_r8g8b8x8: case PIXMAN_r8g8b8: case PIXMAN_b8g8r8: case PIXMAN_r5g6b5: @@ -1243,7 +1104,7 @@ PIXMAN_EXPORT pixman_bool_t pixman_compute_composite_region (pixman_region16_t * region, pixman_image_t * src_image, pixman_image_t * mask_image, - pixman_image_t * dst_image, + pixman_image_t * dest_image, int16_t src_x, int16_t src_y, int16_t mask_x, @@ -1258,8 +1119,8 @@ pixman_compute_composite_region (pixman_region16_t * region, pixman_region32_init (&r32); - retval = pixman_compute_composite_region32 ( - &r32, src_image, mask_image, dst_image, + retval = _pixman_compute_composite_region32 ( + &r32, src_image, mask_image, dest_image, src_x, src_y, mask_x, mask_y, dest_x, dest_y, width, height); diff --git a/programs/develop/libraries/pixman/pixman.h b/programs/develop/libraries/pixman/pixman.h index cfffa79add..7ff9fb52a1 100644 --- a/programs/develop/libraries/pixman/pixman.h +++ b/programs/develop/libraries/pixman/pixman.h @@ -226,6 +226,9 @@ pixman_bool_t pixman_transform_is_inverse (const struct pixman_transform * /* * Floating point matrices */ +typedef struct pixman_f_transform pixman_f_transform_t; +typedef struct pixman_f_vector pixman_f_vector_t; + struct pixman_f_vector { double v[3]; @@ -289,7 +292,28 @@ typedef enum PIXMAN_FILTER_BEST, PIXMAN_FILTER_NEAREST, PIXMAN_FILTER_BILINEAR, - PIXMAN_FILTER_CONVOLUTION + PIXMAN_FILTER_CONVOLUTION, + + /* The SEPARABLE_CONVOLUTION filter takes the following parameters: + * + * width: integer given as 16.16 fixpoint number + * height: integer given as 16.16 fixpoint number + * x_phase_bits: integer given as 16.16 fixpoint + * y_phase_bits: integer given as 16.16 fixpoint + * xtables: (1 << x_phase_bits) tables of size width + * ytables: (1 << y_phase_bits) tables of size height + * + * When sampling at (x, y), the location is first rounded to one of + * n_x_phases * n_y_phases subpixel positions. These subpixel positions + * determine an xtable and a ytable to use. + * + * Conceptually a width x height matrix is then formed in which each entry + * is the product of the corresponding entries in the x and y tables. + * This matrix is then aligned with the image pixels such that its center + * is as close as possible to the subpixel location chosen earlier. Then + * the image is convolved with the matrix and the resulting pixel returned. + */ + PIXMAN_FILTER_SEPARABLE_CONVOLUTION } pixman_filter_t; typedef enum @@ -466,6 +490,7 @@ pixman_bool_t pixman_region_equal (pixman_region16_t *reg pixman_bool_t pixman_region_selfcheck (pixman_region16_t *region); void pixman_region_reset (pixman_region16_t *region, pixman_box16_t *box); +void pixman_region_clear (pixman_region16_t *region); /* * 32 bit regions */ @@ -560,6 +585,7 @@ pixman_bool_t pixman_region32_equal (pixman_region32_t *r pixman_bool_t pixman_region32_selfcheck (pixman_region32_t *region); void pixman_region32_reset (pixman_region32_t *region, pixman_box32_t *box); +void pixman_region32_clear (pixman_region32_t *region); /* Copy / Fill / Misc */ @@ -571,8 +597,8 @@ pixman_bool_t pixman_blt (uint32_t *src_bits, int dst_bpp, int src_x, int src_y, - int dst_x, - int dst_y, + int dest_x, + int dest_y, int width, int height); pixman_bool_t pixman_fill (uint32_t *bits, @@ -650,11 +676,14 @@ struct pixman_indexed #define PIXMAN_TYPE_YUY2 6 #define PIXMAN_TYPE_YV12 7 #define PIXMAN_TYPE_BGRA 8 +#define PIXMAN_TYPE_RGBA 9 +#define PIXMAN_TYPE_ARGB_SRGB 10 #define PIXMAN_FORMAT_COLOR(f) \ (PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_ARGB || \ PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_ABGR || \ - PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_BGRA) + PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_BGRA || \ + PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_RGBA) /* 32bpp formats */ typedef enum { @@ -664,12 +693,17 @@ typedef enum { PIXMAN_x8b8g8r8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,8,8,8), PIXMAN_b8g8r8a8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,8,8,8,8), PIXMAN_b8g8r8x8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,0,8,8,8), + PIXMAN_r8g8b8a8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_RGBA,8,8,8,8), + PIXMAN_r8g8b8x8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_RGBA,0,8,8,8), PIXMAN_x14r6g6b6 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,6,6,6), PIXMAN_x2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,10,10,10), PIXMAN_a2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,2,10,10,10), PIXMAN_x2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,10,10,10), PIXMAN_a2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,2,10,10,10), +/* sRGB formats */ + PIXMAN_a8r8g8b8_sRGB = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB_SRGB,8,8,8,8), + /* 24bpp formats */ PIXMAN_r8g8b8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ARGB,0,8,8,8), PIXMAN_b8g8r8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ABGR,0,8,8,8), @@ -727,18 +761,18 @@ pixman_bool_t pixman_format_supported_destination (pixman_format_code_t format); pixman_bool_t pixman_format_supported_source (pixman_format_code_t format); /* Constructors */ -pixman_image_t *pixman_image_create_solid_fill (pixman_color_t *color); -pixman_image_t *pixman_image_create_linear_gradient (pixman_point_fixed_t *p1, - pixman_point_fixed_t *p2, +pixman_image_t *pixman_image_create_solid_fill (const pixman_color_t *color); +pixman_image_t *pixman_image_create_linear_gradient (const pixman_point_fixed_t *p1, + const pixman_point_fixed_t *p2, const pixman_gradient_stop_t *stops, int n_stops); -pixman_image_t *pixman_image_create_radial_gradient (pixman_point_fixed_t *inner, - pixman_point_fixed_t *outer, +pixman_image_t *pixman_image_create_radial_gradient (const pixman_point_fixed_t *inner, + const pixman_point_fixed_t *outer, pixman_fixed_t inner_radius, pixman_fixed_t outer_radius, const pixman_gradient_stop_t *stops, int n_stops); -pixman_image_t *pixman_image_create_conical_gradient (pixman_point_fixed_t *center, +pixman_image_t *pixman_image_create_conical_gradient (const pixman_point_fixed_t *center, pixman_fixed_t angle, const pixman_gradient_stop_t *stops, int n_stops); @@ -747,6 +781,11 @@ pixman_image_t *pixman_image_create_bits (pixman_format_code_t int height, uint32_t *bits, int rowstride_bytes); +pixman_image_t *pixman_image_create_bits_no_clear (pixman_format_code_t format, + int width, + int height, + uint32_t * bits, + int rowstride_bytes); /* Destructor */ pixman_image_t *pixman_image_ref (pixman_image_t *image); @@ -792,14 +831,41 @@ int pixman_image_get_height (pixman_image_t int pixman_image_get_stride (pixman_image_t *image); /* in bytes */ int pixman_image_get_depth (pixman_image_t *image); pixman_format_code_t pixman_image_get_format (pixman_image_t *image); + +typedef enum +{ + PIXMAN_KERNEL_IMPULSE, + PIXMAN_KERNEL_BOX, + PIXMAN_KERNEL_LINEAR, + PIXMAN_KERNEL_CUBIC, + PIXMAN_KERNEL_GAUSSIAN, + PIXMAN_KERNEL_LANCZOS2, + PIXMAN_KERNEL_LANCZOS3, + PIXMAN_KERNEL_LANCZOS3_STRETCHED /* Jim Blinn's 'nice' filter */ +} pixman_kernel_t; + +/* Create the parameter list for a SEPARABLE_CONVOLUTION filter + * with the given kernels and scale parameters. + */ +pixman_fixed_t * +pixman_filter_create_separable_convolution (int *n_values, + pixman_fixed_t scale_x, + pixman_fixed_t scale_y, + pixman_kernel_t reconstruct_x, + pixman_kernel_t reconstruct_y, + pixman_kernel_t sample_x, + pixman_kernel_t sample_y, + int subsample_bits_x, + int subsample_bits_y); + pixman_bool_t pixman_image_fill_rectangles (pixman_op_t op, pixman_image_t *image, - pixman_color_t *color, + const pixman_color_t *color, int n_rects, const pixman_rectangle16_t *rects); pixman_bool_t pixman_image_fill_boxes (pixman_op_t op, pixman_image_t *dest, - pixman_color_t *color, + const pixman_color_t *color, int n_boxes, const pixman_box32_t *boxes); @@ -807,7 +873,7 @@ pixman_bool_t pixman_image_fill_boxes (pixman_op_t pixman_bool_t pixman_compute_composite_region (pixman_region16_t *region, pixman_image_t *src_image, pixman_image_t *mask_image, - pixman_image_t *dst_image, + pixman_image_t *dest_image, int16_t src_x, int16_t src_y, int16_t mask_x, @@ -841,19 +907,84 @@ void pixman_image_composite32 (pixman_op_t op, int32_t width, int32_t height); -/* Old X servers rely on out-of-bounds accesses when they are asked - * to composite with a window as the source. They create a pixman image - * pointing to some bogus position in memory, but then they set a clip - * region to the position where the actual bits are. +/* Executive Summary: This function is a no-op that only exists + * for historical reasons. + * + * There used to be a bug in the X server where it would rely on + * out-of-bounds accesses when it was asked to composite with a + * window as the source. It would create a pixman image pointing + * to some bogus position in memory, but then set a clip region + * to the position where the actual bits were. * * Due to a bug in old versions of pixman, where it would not clip * against the image bounds when a clip region was set, this would - * actually work. So by default we allow certain out-of-bound access - * to happen unless explicitly disabled. + * actually work. So when the pixman bug was fixed, a workaround was + * added to allow certain out-of-bound accesses. This function disabled + * those workarounds. * - * Fixed X servers should call this function to disable the workaround. + * Since 0.21.2, pixman doesn't do these workarounds anymore, so now this + * function is a no-op. */ -void pixman_disable_out_of_bounds_workaround (void); +void pixman_disable_out_of_bounds_workaround (void); + +/* + * Glyphs + */ +typedef struct pixman_glyph_cache_t pixman_glyph_cache_t; +typedef struct +{ + int x, y; + const void *glyph; +} pixman_glyph_t; + +pixman_glyph_cache_t *pixman_glyph_cache_create (void); +void pixman_glyph_cache_destroy (pixman_glyph_cache_t *cache); +void pixman_glyph_cache_freeze (pixman_glyph_cache_t *cache); +void pixman_glyph_cache_thaw (pixman_glyph_cache_t *cache); +const void * pixman_glyph_cache_lookup (pixman_glyph_cache_t *cache, + void *font_key, + void *glyph_key); +const void * pixman_glyph_cache_insert (pixman_glyph_cache_t *cache, + void *font_key, + void *glyph_key, + int origin_x, + int origin_y, + pixman_image_t *glyph_image); +void pixman_glyph_cache_remove (pixman_glyph_cache_t *cache, + void *font_key, + void *glyph_key); +void pixman_glyph_get_extents (pixman_glyph_cache_t *cache, + int n_glyphs, + pixman_glyph_t *glyphs, + pixman_box32_t *extents); +pixman_format_code_t pixman_glyph_get_mask_format (pixman_glyph_cache_t *cache, + int n_glyphs, + const pixman_glyph_t *glyphs); +void pixman_composite_glyphs (pixman_op_t op, + pixman_image_t *src, + pixman_image_t *dest, + pixman_format_code_t mask_format, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height, + pixman_glyph_cache_t *cache, + int n_glyphs, + const pixman_glyph_t *glyphs); +void pixman_composite_glyphs_no_mask (pixman_op_t op, + pixman_image_t *src, + pixman_image_t *dest, + int32_t src_x, + int32_t src_y, + int32_t dest_x, + int32_t dest_y, + pixman_glyph_cache_t *cache, + int n_glyphs, + const pixman_glyph_t *glyphs); /* * Trapezoids @@ -862,6 +993,7 @@ typedef struct pixman_edge pixman_edge_t; typedef struct pixman_trapezoid pixman_trapezoid_t; typedef struct pixman_trap pixman_trap_t; typedef struct pixman_span_fix pixman_span_fix_t; +typedef struct pixman_triangle pixman_triangle_t; /* * An edge structure. This represents a single polygon edge @@ -889,6 +1021,10 @@ struct pixman_trapezoid pixman_line_fixed_t left, right; }; +struct pixman_triangle +{ + pixman_point_fixed_t p1, p2, p3; +}; /* whether 't' is a well defined not obviously empty trapezoid */ #define pixman_trapezoid_valid(t) \ @@ -934,7 +1070,7 @@ void pixman_add_traps (pixman_image_t *image, int16_t x_off, int16_t y_off, int ntrap, - pixman_trap_t *traps); + const pixman_trap_t *traps); void pixman_add_trapezoids (pixman_image_t *image, int16_t x_off, int y_off, @@ -944,6 +1080,31 @@ void pixman_rasterize_trapezoid (pixman_image_t *image, const pixman_trapezoid_t *trap, int x_off, int y_off); +void pixman_composite_trapezoids (pixman_op_t op, + pixman_image_t * src, + pixman_image_t * dst, + pixman_format_code_t mask_format, + int x_src, + int y_src, + int x_dst, + int y_dst, + int n_traps, + const pixman_trapezoid_t * traps); +void pixman_composite_triangles (pixman_op_t op, + pixman_image_t * src, + pixman_image_t * dst, + pixman_format_code_t mask_format, + int x_src, + int y_src, + int x_dst, + int y_dst, + int n_tris, + const pixman_triangle_t * tris); +void pixman_add_triangles (pixman_image_t *image, + int32_t x_off, + int32_t y_off, + int n_tris, + const pixman_triangle_t *tris); PIXMAN_END_DECLS