mirror of
https://github.com/Relintai/sdl2_frt.git
synced 2024-12-29 20:27:12 +01:00
metal: SDL_RenderFillRects uses one draw call per 16k rectangles (within the given FillRects call), instead of one draw call per rectangle. Reduces CPU usage when drawing many rectangles.
This commit is contained in:
parent
5f98051457
commit
4a58722b9f
@ -117,6 +117,7 @@ typedef struct METAL_ShaderPipelines
|
|||||||
@property (nonatomic, retain) id<MTLSamplerState> mtlsamplernearest;
|
@property (nonatomic, retain) id<MTLSamplerState> mtlsamplernearest;
|
||||||
@property (nonatomic, retain) id<MTLSamplerState> mtlsamplerlinear;
|
@property (nonatomic, retain) id<MTLSamplerState> mtlsamplerlinear;
|
||||||
@property (nonatomic, retain) id<MTLBuffer> mtlbufconstants;
|
@property (nonatomic, retain) id<MTLBuffer> mtlbufconstants;
|
||||||
|
@property (nonatomic, retain) id<MTLBuffer> mtlbufquadindices;
|
||||||
@property (nonatomic, retain) CAMetalLayer *mtllayer;
|
@property (nonatomic, retain) CAMetalLayer *mtllayer;
|
||||||
@property (nonatomic, retain) MTLRenderPassDescriptor *mtlpassdesc;
|
@property (nonatomic, retain) MTLRenderPassDescriptor *mtlpassdesc;
|
||||||
@property (nonatomic, assign) METAL_ShaderPipelines *activepipelines;
|
@property (nonatomic, assign) METAL_ShaderPipelines *activepipelines;
|
||||||
@ -137,6 +138,7 @@ typedef struct METAL_ShaderPipelines
|
|||||||
[_mtlsamplernearest release];
|
[_mtlsamplernearest release];
|
||||||
[_mtlsamplerlinear release];
|
[_mtlsamplerlinear release];
|
||||||
[_mtlbufconstants release];
|
[_mtlbufconstants release];
|
||||||
|
[_mtlbufquadindices release];
|
||||||
[_mtllayer release];
|
[_mtllayer release];
|
||||||
[_mtlpassdesc release];
|
[_mtlpassdesc release];
|
||||||
[super dealloc];
|
[super dealloc];
|
||||||
@ -794,7 +796,6 @@ METAL_QueueDrawPoints(SDL_Renderer * renderer, SDL_RenderCommand *cmd, const SDL
|
|||||||
static int
|
static int
|
||||||
METAL_QueueFillRects(SDL_Renderer * renderer, SDL_RenderCommand *cmd, const SDL_FRect * rects, int count)
|
METAL_QueueFillRects(SDL_Renderer * renderer, SDL_RenderCommand *cmd, const SDL_FRect * rects, int count)
|
||||||
{
|
{
|
||||||
// !!! FIXME: use an index buffer
|
|
||||||
const size_t vertlen = (sizeof (float) * 8) * count;
|
const size_t vertlen = (sizeof (float) * 8) * count;
|
||||||
float *verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, 0, &cmd->data.draw.first);
|
float *verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, 0, &cmd->data.draw.first);
|
||||||
if (!verts) {
|
if (!verts) {
|
||||||
@ -803,6 +804,11 @@ METAL_QueueFillRects(SDL_Renderer * renderer, SDL_RenderCommand *cmd, const SDL_
|
|||||||
|
|
||||||
cmd->data.draw.count = count;
|
cmd->data.draw.count = count;
|
||||||
|
|
||||||
|
/* Quads in the following vertex order (matches the quad index buffer):
|
||||||
|
* 1---3
|
||||||
|
* | \ |
|
||||||
|
* 0---2
|
||||||
|
*/
|
||||||
for (int i = 0; i < count; i++, rects++) {
|
for (int i = 0; i < count; i++, rects++) {
|
||||||
if ((rects->w <= 0.0f) || (rects->h <= 0.0f)) {
|
if ((rects->w <= 0.0f) || (rects->h <= 0.0f)) {
|
||||||
cmd->data.draw.count--;
|
cmd->data.draw.count--;
|
||||||
@ -829,9 +835,8 @@ static int
|
|||||||
METAL_QueueCopy(SDL_Renderer * renderer, SDL_RenderCommand *cmd, SDL_Texture * texture,
|
METAL_QueueCopy(SDL_Renderer * renderer, SDL_RenderCommand *cmd, SDL_Texture * texture,
|
||||||
const SDL_Rect * srcrect, const SDL_FRect * dstrect)
|
const SDL_Rect * srcrect, const SDL_FRect * dstrect)
|
||||||
{
|
{
|
||||||
METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
|
const float texw = (float) texture->w;
|
||||||
const float texw = (float) texturedata.mtltexture.width;
|
const float texh = (float) texture->h;
|
||||||
const float texh = (float) texturedata.mtltexture.height;
|
|
||||||
// !!! FIXME: use an index buffer
|
// !!! FIXME: use an index buffer
|
||||||
const size_t vertlen = (sizeof (float) * 16);
|
const size_t vertlen = (sizeof (float) * 16);
|
||||||
float *verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, 0, &cmd->data.draw.first);
|
float *verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, 0, &cmd->data.draw.first);
|
||||||
@ -867,9 +872,8 @@ METAL_QueueCopyEx(SDL_Renderer * renderer, SDL_RenderCommand *cmd, SDL_Texture *
|
|||||||
const SDL_Rect * srcquad, const SDL_FRect * dstrect,
|
const SDL_Rect * srcquad, const SDL_FRect * dstrect,
|
||||||
const double angle, const SDL_FPoint *center, const SDL_RendererFlip flip)
|
const double angle, const SDL_FPoint *center, const SDL_RendererFlip flip)
|
||||||
{
|
{
|
||||||
METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
|
const float texw = (float) texture->w;
|
||||||
const float texw = (float) texturedata.mtltexture.width;
|
const float texh = (float) texture->h;
|
||||||
const float texh = (float) texturedata.mtltexture.height;
|
|
||||||
const float rads = (float)(M_PI * (float) angle / 180.0f);
|
const float rads = (float)(M_PI * (float) angle / 180.0f);
|
||||||
const float c = cosf(rads), s = sinf(rads);
|
const float c = cosf(rads), s = sinf(rads);
|
||||||
float minu, maxu, minv, maxv;
|
float minu, maxu, minv, maxv;
|
||||||
@ -1159,10 +1163,19 @@ METAL_RunCommandQueue(SDL_Renderer * renderer, SDL_RenderCommand *cmd, void *ver
|
|||||||
|
|
||||||
case SDL_RENDERCMD_FILL_RECTS: {
|
case SDL_RENDERCMD_FILL_RECTS: {
|
||||||
const size_t count = cmd->data.draw.count;
|
const size_t count = cmd->data.draw.count;
|
||||||
size_t start = 0;
|
const size_t maxcount = UINT16_MAX / 6;
|
||||||
SetDrawState(renderer, cmd, SDL_METAL_FRAGMENT_SOLID, CONSTANTS_OFFSET_IDENTITY, mtlbufvertex, &statecache);
|
SetDrawState(renderer, cmd, SDL_METAL_FRAGMENT_SOLID, CONSTANTS_OFFSET_IDENTITY, mtlbufvertex, &statecache);
|
||||||
for (size_t i = 0; i < count; i++, start += 4) { // !!! FIXME: can we do all of these this with a single draw call, using MTLPrimitiveTypeTriangle and an index buffer?
|
/* Our index buffer has 16 bit indices, so we can only draw 65k
|
||||||
[data.mtlcmdencoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:start vertexCount:4];
|
* vertices (16k rects) at a time. */
|
||||||
|
for (size_t i = 0; i < count; i += maxcount) {
|
||||||
|
/* Set the vertex buffer offset for our current positions.
|
||||||
|
* The vertex buffer itself was bound in SetDrawState. */
|
||||||
|
[data.mtlcmdencoder setVertexBufferOffset:cmd->data.draw.first + i*sizeof(float)*8 atIndex:0];
|
||||||
|
[data.mtlcmdencoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle
|
||||||
|
indexCount:SDL_min(maxcount, count - i) * 6
|
||||||
|
indexType:MTLIndexTypeUInt16
|
||||||
|
indexBuffer:data.mtlbufquadindices
|
||||||
|
indexBufferOffset:0];
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1424,11 +1437,6 @@ METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
|
|||||||
#if !__has_feature(objc_arc)
|
#if !__has_feature(objc_arc)
|
||||||
[mtlbufconstantstaging autorelease];
|
[mtlbufconstantstaging autorelease];
|
||||||
#endif
|
#endif
|
||||||
mtlbufconstantstaging.label = @"SDL constant staging data";
|
|
||||||
|
|
||||||
id<MTLBuffer> mtlbufconstants = [data.mtldevice newBufferWithLength:CONSTANTS_LENGTH options:MTLResourceStorageModePrivate];
|
|
||||||
data.mtlbufconstants = mtlbufconstants;
|
|
||||||
data.mtlbufconstants.label = @"SDL constant data";
|
|
||||||
|
|
||||||
char *constantdata = [mtlbufconstantstaging contents];
|
char *constantdata = [mtlbufconstantstaging contents];
|
||||||
SDL_memcpy(constantdata + CONSTANTS_OFFSET_IDENTITY, identitytransform, sizeof(identitytransform));
|
SDL_memcpy(constantdata + CONSTANTS_OFFSET_IDENTITY, identitytransform, sizeof(identitytransform));
|
||||||
@ -1437,10 +1445,42 @@ METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
|
|||||||
SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_BT601, decodetransformBT601, sizeof(decodetransformBT601));
|
SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_BT601, decodetransformBT601, sizeof(decodetransformBT601));
|
||||||
SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_BT709, decodetransformBT709, sizeof(decodetransformBT709));
|
SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_BT709, decodetransformBT709, sizeof(decodetransformBT709));
|
||||||
|
|
||||||
|
int quadcount = UINT16_MAX / 4;
|
||||||
|
size_t indicessize = sizeof(UInt16) * quadcount * 6;
|
||||||
|
id<MTLBuffer> mtlbufquadindicesstaging = [data.mtldevice newBufferWithLength:indicessize options:MTLResourceStorageModeShared];
|
||||||
|
#if !__has_feature(objc_arc)
|
||||||
|
[mtlbufquadindicesstaging autorelease];
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Quads in the following vertex order (matches the FillRects vertices):
|
||||||
|
* 1---3
|
||||||
|
* | \ |
|
||||||
|
* 0---2
|
||||||
|
*/
|
||||||
|
UInt16 *indexdata = [mtlbufquadindicesstaging contents];
|
||||||
|
for (int i = 0; i < quadcount; i++) {
|
||||||
|
indexdata[i * 6 + 0] = i * 4 + 0;
|
||||||
|
indexdata[i * 6 + 1] = i * 4 + 1;
|
||||||
|
indexdata[i * 6 + 2] = i * 4 + 2;
|
||||||
|
|
||||||
|
indexdata[i * 6 + 3] = i * 4 + 2;
|
||||||
|
indexdata[i * 6 + 4] = i * 4 + 1;
|
||||||
|
indexdata[i * 6 + 5] = i * 4 + 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
id<MTLBuffer> mtlbufconstants = [data.mtldevice newBufferWithLength:CONSTANTS_LENGTH options:MTLResourceStorageModePrivate];
|
||||||
|
data.mtlbufconstants = mtlbufconstants;
|
||||||
|
data.mtlbufconstants.label = @"SDL constant data";
|
||||||
|
|
||||||
|
id<MTLBuffer> mtlbufquadindices = [data.mtldevice newBufferWithLength:indicessize options:MTLResourceStorageModePrivate];
|
||||||
|
data.mtlbufquadindices = mtlbufquadindices;
|
||||||
|
data.mtlbufquadindices.label = @"SDL quad index buffer";
|
||||||
|
|
||||||
id<MTLCommandBuffer> cmdbuffer = [data.mtlcmdqueue commandBuffer];
|
id<MTLCommandBuffer> cmdbuffer = [data.mtlcmdqueue commandBuffer];
|
||||||
id<MTLBlitCommandEncoder> blitcmd = [cmdbuffer blitCommandEncoder];
|
id<MTLBlitCommandEncoder> blitcmd = [cmdbuffer blitCommandEncoder];
|
||||||
|
|
||||||
[blitcmd copyFromBuffer:mtlbufconstantstaging sourceOffset:0 toBuffer:data.mtlbufconstants destinationOffset:0 size:CONSTANTS_LENGTH];
|
[blitcmd copyFromBuffer:mtlbufconstantstaging sourceOffset:0 toBuffer:mtlbufconstants destinationOffset:0 size:CONSTANTS_LENGTH];
|
||||||
|
[blitcmd copyFromBuffer:mtlbufquadindicesstaging sourceOffset:0 toBuffer:mtlbufquadindices destinationOffset:0 size:indicessize];
|
||||||
|
|
||||||
[blitcmd endEncoding];
|
[blitcmd endEncoding];
|
||||||
[cmdbuffer commit];
|
[cmdbuffer commit];
|
||||||
@ -1503,8 +1543,10 @@ METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
|
|||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
#ifdef __IPHONE_11_0
|
#ifdef __IPHONE_11_0
|
||||||
|
if (@available(iOS 11.0, *)) {
|
||||||
if ([mtldevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily4_v1]) {
|
if ([mtldevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily4_v1]) {
|
||||||
maxtexsize = 16384;
|
maxtexsize = 16384;
|
||||||
|
}
|
||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
#ifdef __IPHONE_10_0
|
#ifdef __IPHONE_10_0
|
||||||
@ -1529,6 +1571,7 @@ METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
|
|||||||
[mtlsamplernearest release];
|
[mtlsamplernearest release];
|
||||||
[mtlsamplerlinear release];
|
[mtlsamplerlinear release];
|
||||||
[mtlbufconstants release];
|
[mtlbufconstants release];
|
||||||
|
[mtlbufquadindices release];
|
||||||
[view release];
|
[view release];
|
||||||
[data release];
|
[data release];
|
||||||
[mtldevice release];
|
[mtldevice release];
|
||||||
|
Loading…
Reference in New Issue
Block a user