From 6e35e4214559c8db22e3f45734daa3dada42da1a Mon Sep 17 00:00:00 2001 From: Sam Lantinga Date: Mon, 1 Oct 2018 14:43:03 -0700 Subject: [PATCH] Working on bug 3921 - Add some Fastpath to BlitNtoNKey and BlitNtoNKeyCopyAlpha Sylvain I did various benches. with clang 6.0.0 on linux, and ndk-r16b on android (NDK_TOOLCHAIN_VERSION=clang). - still see a x10 speed factor. - with duff_loops, it does not use vectorisation (but doesn't seem to be a problem). on linux my patch is already at full speed on -O2, whereas the duff_loops need -O3 (200 ms at -03, and 300ms at -02). I realized that on Android, I had a slight variation which fits best. both on linux with -O2 and -O3, and on android with 02/03 and armeabi-v7a/arm64. Here's the patch. --- src/video/SDL_blit_N.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/video/SDL_blit_N.c b/src/video/SDL_blit_N.c index de495163a..d6ec417fe 100644 --- a/src/video/SDL_blit_N.c +++ b/src/video/SDL_blit_N.c @@ -2344,8 +2344,9 @@ BlitNtoNKey(SDL_BlitInfo * info) /* *INDENT-OFF* */ DUFFS_LOOP( { - Uint32 Pixel = (*src32 == ckey) ? *dst32 : *src32; - *dst32 = Pixel; + if (*src32 != ckey) { + *dst32 = *src32; + } ++src32; ++dst32; }, @@ -2418,8 +2419,9 @@ BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info) /* *INDENT-OFF* */ DUFFS_LOOP( { - Uint32 Pixel_ = ((*src32 & rgbmask) == ckey) ? *dst32 : *src32; - *dst32 = Pixel_; + if ((*src32 & rgbmask) != ckey) { + *dst32 = *src32; + } ++src32; ++dst32; },