mirror of
https://github.com/Relintai/sdl2_frt.git
synced 2024-12-20 22:16:49 +01:00
ARM: SIMD optimization for 4:4:4:4 to 8:8:8:8 normal blits
This commit is contained in:
parent
becc649ae2
commit
74846657ec
@ -950,6 +950,21 @@ Blit_BGR888_RGB888ARMSIMD(SDL_BlitInfo * info)
|
|||||||
|
|
||||||
Blit_BGR888_RGB888ARMSIMDAsm(width, height, dstp, dststride, srcp, srcstride);
|
Blit_BGR888_RGB888ARMSIMDAsm(width, height, dstp, dststride, srcp, srcstride);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Blit_RGB444_RGB888ARMSIMDAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint16_t *src, int32_t src_stride);
|
||||||
|
|
||||||
|
static void
|
||||||
|
Blit_RGB444_RGB888ARMSIMD(SDL_BlitInfo * info)
|
||||||
|
{
|
||||||
|
int32_t width = info->dst_w;
|
||||||
|
int32_t height = info->dst_h;
|
||||||
|
uint32_t *dstp = (uint32_t *)info->dst;
|
||||||
|
int32_t dststride = width + (info->dst_skip >> 2);
|
||||||
|
uint16_t *srcp = (uint16_t *)info->src;
|
||||||
|
int32_t srcstride = width + (info->src_skip >> 1);
|
||||||
|
|
||||||
|
Blit_RGB444_RGB888ARMSIMDAsm(width, height, dstp, dststride, srcp, srcstride);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* This is now endian dependent */
|
/* This is now endian dependent */
|
||||||
@ -3226,6 +3241,10 @@ static const struct blit_table normal_blit_2[] = {
|
|||||||
BLIT_FEATURE_HAS_ALTIVEC, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
|
BLIT_FEATURE_HAS_ALTIVEC, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
|
||||||
{0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000,
|
{0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000,
|
||||||
BLIT_FEATURE_HAS_ALTIVEC, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
|
BLIT_FEATURE_HAS_ALTIVEC, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
|
||||||
|
#endif
|
||||||
|
#if SDL_ARM_SIMD_BLITTERS
|
||||||
|
{0x00000F00, 0x000000F0, 0x0000000F, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
|
||||||
|
BLIT_FEATURE_HAS_ARM_SIMD, Blit_RGB444_RGB888ARMSIMD, NO_ALPHA | COPY_ALPHA},
|
||||||
#endif
|
#endif
|
||||||
{0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
|
{0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
|
||||||
0, Blit_RGB565_ARGB8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
|
0, Blit_RGB565_ARGB8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
|
||||||
|
@ -473,3 +473,60 @@ generate_composite_function \
|
|||||||
nop_macro, /* cleanup */ \
|
nop_macro, /* cleanup */ \
|
||||||
BGR888toRGB888_process_head, \
|
BGR888toRGB888_process_head, \
|
||||||
BGR888toRGB888_process_tail
|
BGR888toRGB888_process_tail
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
.macro RGB444toRGB888_init
|
||||||
|
ldr MASK, =0x0f0f0f0f
|
||||||
|
/* Set GE[3:0] to 0101 so SEL instructions do what we want */
|
||||||
|
msr CPSR_s, #0x50000
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro RGB444toRGB888_1pixel reg, mask, tmp
|
||||||
|
pkhbt WK®, WK®, WK®, lsl #12 @ 0000aaaarrrrggggaaaarrrrggggbbbb
|
||||||
|
and WK®, mask, WK® @ 0000aaaa0000gggg0000rrrr0000bbbb
|
||||||
|
orr WK®, WK®, WK®, lsl #4 @ aaaaaaaaggggggggrrrrrrrrbbbbbbbb
|
||||||
|
pkhtb tmp, WK®, WK®, asr #8 @ aaaaaaaaggggggggggggggggrrrrrrrr
|
||||||
|
pkhbt WK®, WK®, WK®, lsl #8 @ ggggggggrrrrrrrrrrrrrrrrbbbbbbbb
|
||||||
|
sel WK®, WK®, tmp @ aaaaaaaarrrrrrrrggggggggbbbbbbbb
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro RGB444toRGB888_2pixels in, out1, out2, mask, tmp1, tmp2
|
||||||
|
and tmp1, mask, WK&in @ 0000RRRR0000BBBB0000rrrr0000bbbb
|
||||||
|
and tmp2, mask, WK&in, lsr #4 @ 0000AAAA0000GGGG0000aaaa0000gggg
|
||||||
|
orr tmp1, tmp1, tmp1, lsl #4 @ RRRRRRRRBBBBBBBBrrrrrrrrbbbbbbbb
|
||||||
|
orr tmp2, tmp2, tmp2, lsl #4 @ AAAAAAAAGGGGGGGGaaaaaaaagggggggg
|
||||||
|
pkhtb WK&out2, tmp2, tmp1, asr #16 @ AAAAAAAAGGGGGGGGRRRRRRRRBBBBBBBB
|
||||||
|
pkhbt WK&out1, tmp1, tmp2, lsl #16 @ aaaaaaaaggggggggrrrrrrrrbbbbbbbb
|
||||||
|
pkhtb tmp2, WK&out2, WK&out2, asr #8 @ AAAAAAAAGGGGGGGGGGGGGGGGRRRRRRRR
|
||||||
|
pkhtb tmp1, WK&out1, WK&out1, asr #8 @ aaaaaaaaggggggggggggggggrrrrrrrr
|
||||||
|
pkhbt WK&out1, WK&out1, WK&out1, lsl #8 @ ggggggggrrrrrrrrrrrrrrrrbbbbbbbb
|
||||||
|
pkhbt WK&out2, WK&out2, WK&out2, lsl #8 @ GGGGGGGGRRRRRRRRRRRRRRRRBBBBBBBB
|
||||||
|
sel WK&out1, WK&out1, tmp1 @ aaaaaaaarrrrrrrrggggggggbbbbbbbb
|
||||||
|
sel WK&out2, WK&out2, tmp2 @ AAAAAAAARRRRRRRRGGGGGGGGBBBBBBBB
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro RGB444toRGB888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
|
||||||
|
pixld cond, numbytes/2, firstreg, SRC, unaligned_src
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro RGB444toRGB888_process_tail cond, numbytes, firstreg
|
||||||
|
.if numbytes >= 8
|
||||||
|
.if numbytes == 16
|
||||||
|
RGB444toRGB888_2pixels %(firstreg+1), %(firstreg+2), %(firstreg+3), MASK, STRIDE_M, SCRATCH
|
||||||
|
.endif
|
||||||
|
RGB444toRGB888_2pixels %(firstreg+0), %(firstreg+0), %(firstreg+1), MASK, STRIDE_M, SCRATCH
|
||||||
|
.else @ numbytes == 4
|
||||||
|
RGB444toRGB888_1pixel %(firstreg+0), MASK, SCRATCH
|
||||||
|
.endif
|
||||||
|
.endm
|
||||||
|
|
||||||
|
generate_composite_function \
|
||||||
|
Blit_RGB444_RGB888ARMSIMDAsm, 16, 0, 32, \
|
||||||
|
FLAG_DST_WRITEONLY | FLAG_BRANCH_OVER, \
|
||||||
|
2, /* prefetch distance */ \
|
||||||
|
RGB444toRGB888_init, \
|
||||||
|
nop_macro, /* newline */ \
|
||||||
|
nop_macro, /* cleanup */ \
|
||||||
|
RGB444toRGB888_process_head, \
|
||||||
|
RGB444toRGB888_process_tail
|
||||||
|
Loading…
Reference in New Issue
Block a user