From 2a2b256b95f10cfa84a3b9678a8afbf26d45b820 Mon Sep 17 00:00:00 2001 From: Andrey Nazarov Date: Sun, 6 Oct 2024 21:25:38 +0300 Subject: [PATCH 01/19] Flush uniform buffer explicitly. --- src/refresh/debug.c | 1 + src/refresh/gl.h | 10 ++++++++++ src/refresh/main.c | 4 ++++ src/refresh/mesh.c | 2 ++ src/refresh/shader.c | 9 ++++----- src/refresh/tess.c | 3 +++ 6 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/refresh/debug.c b/src/refresh/debug.c index e7a7338e2..4d7f2ffc7 100644 --- a/src/refresh/debug.c +++ b/src/refresh/debug.c @@ -417,6 +417,7 @@ static void GL_DrawDebugLines(void) return; GL_LoadMatrix(glr.viewmatrix); + GL_LoadUniforms(); GL_BindTexture(TMU_TEXTURE, TEXNUM_WHITE); GL_BindArrays(VA_NULLMODEL); GL_ArrayBits(GLA_VERTEX | GLA_COLOR); diff --git a/src/refresh/gl.h b/src/refresh/gl.h index b9fcd5b02..d7f374fef 100644 --- a/src/refresh/gl.h +++ b/src/refresh/gl.h @@ -556,6 +556,7 @@ typedef struct { GLfloat view_matrix[16]; GLfloat proj_matrix[16]; glUniformBlock_t u_block; + bool u_block_dirty; } glState_t; extern glState_t gls; @@ -577,6 +578,7 @@ typedef struct { void (*setup_3d)(void); void (*load_matrix)(GLenum mode, const GLfloat *matrix); + void (*load_uniforms)(void); void (*state_bits)(glStateBits_t bits); void (*array_bits)(glArrayBits_t bits); @@ -635,6 +637,14 @@ static inline void GL_LoadMatrix(const GLfloat *matrix) } } +static inline void GL_LoadUniforms(void) +{ + if (gls.u_block_dirty && gl_backend->load_uniforms) { + gl_backend->load_uniforms(); + gls.u_block_dirty = false; + } +} + static inline void GL_BindBuffer(GLenum target, GLuint buffer) { const int i = target == GL_ELEMENT_ARRAY_BUFFER; diff --git a/src/refresh/main.c b/src/refresh/main.c index 804f74025..514b432c5 100644 --- a/src/refresh/main.c +++ b/src/refresh/main.c @@ -345,6 +345,7 @@ static void GL_DrawSpriteModel(const model_t *model) } GL_LoadMatrix(glr.viewmatrix); + GL_LoadUniforms(); GL_BindTexture(TMU_TEXTURE, image->texnum); GL_BindArrays(VA_SPRITE); GL_StateBits(bits); @@ -393,6 +394,7 @@ static void GL_DrawNullModel(void) WN32(tess.vertices + 23, U32_BLUE); GL_LoadMatrix(glr.viewmatrix); + GL_LoadUniforms(); GL_BindTexture(TMU_TEXTURE, TEXNUM_WHITE); GL_BindArrays(VA_NULLMODEL); GL_StateBits(GLS_DEFAULT); @@ -462,6 +464,7 @@ static void GL_OccludeFlares(void) if (!set) { GL_LoadMatrix(glr.viewmatrix); + GL_LoadUniforms(); GL_BindTexture(TMU_TEXTURE, TEXNUM_WHITE); GL_BindArrays(VA_OCCLUDE); GL_StateBits(GLS_DEPTHMASK_FALSE); @@ -630,6 +633,7 @@ static void GL_WaterWarp(void) GL_StateBits(GLS_DEPTHTEST_DISABLE | GLS_DEPTHMASK_FALSE | GLS_CULL_DISABLE | GLS_TEXTURE_REPLACE | GLS_WARP_ENABLE); GL_ArrayBits(GLA_VERTEX | GLA_TC); + GL_LoadUniforms(); x0 = glr.fd.x; x1 = glr.fd.x + glr.fd.width; diff --git a/src/refresh/mesh.c b/src/refresh/mesh.c index a30231513..38a3ad02f 100644 --- a/src/refresh/mesh.c +++ b/src/refresh/mesh.c @@ -538,6 +538,7 @@ static void draw_shadow(const glIndex_t *indices, int num_indices) // load shadow projection matrix GL_LoadMatrix(shadowmatrix); + GL_LoadUniforms(); // eliminate z-fighting by utilizing stencil buffer, if available if (gl_config.stencilbits) { @@ -606,6 +607,7 @@ static void draw_alias_mesh(const glIndex_t *indices, int num_indices, // fall back to entity matrix GL_LoadMatrix(glr.entmatrix); + GL_LoadUniforms(); // avoid drawing hidden faces for transparent gun by pre-filling depth buffer // muzzle flashes are excluded by checking for RF_FULLBRIGHT bit diff --git a/src/refresh/shader.c b/src/refresh/shader.c index c06060cd9..a931d2f76 100644 --- a/src/refresh/shader.c +++ b/src/refresh/shader.c @@ -24,8 +24,6 @@ with this program; if not, write to the Free Software Foundation, Inc., #define GLSL(x) SZ_Write(buf, CONST_STR_LEN(#x "\n")); #define GLSF(x) SZ_Write(buf, CONST_STR_LEN(x)) -static void upload_u_block(void); - static void write_header(sizebuf_t *buf) { if (gl_config.ver_es) { @@ -344,7 +342,7 @@ static void shader_state_bits(glStateBits_t bits) if (diff & GLS_SCROLL_MASK && bits & GLS_SCROLL_ENABLE) { GL_ScrollPos(gls.u_block.scroll, bits); - upload_u_block(); + gls.u_block_dirty = true; } } @@ -385,7 +383,7 @@ static void shader_color(GLfloat r, GLfloat g, GLfloat b, GLfloat a) qglVertexAttrib4f(VERT_ATTR_COLOR, r, g, b, a); } -static void upload_u_block(void) +static void shader_load_uniforms(void) { qglBufferData(GL_UNIFORM_BUFFER, sizeof(gls.u_block), &gls.u_block, GL_DYNAMIC_DRAW); c.uniformUploads++; @@ -405,7 +403,7 @@ static void shader_load_matrix(GLenum mode, const GLfloat *matrix) } GL_MultMatrix(gls.u_block.mvp, gls.proj_matrix, gls.view_matrix); - upload_u_block(); + gls.u_block_dirty = true; } static void shader_setup_2d(void) @@ -510,6 +508,7 @@ const glbackend_t backend_shader = { .setup_3d = shader_setup_3d, .load_matrix = shader_load_matrix, + .load_uniforms = shader_load_uniforms, .state_bits = shader_state_bits, .array_bits = shader_array_bits, diff --git a/src/refresh/tess.c b/src/refresh/tess.c index 3d775dc67..79f67da5b 100644 --- a/src/refresh/tess.c +++ b/src/refresh/tess.c @@ -73,6 +73,7 @@ void GL_DrawParticles(void) return; GL_LoadMatrix(glr.viewmatrix); + GL_LoadUniforms(); GL_BindArrays(VA_EFFECT); bits = (gl_partstyle->integer ? GLS_BLEND_ADD : GLS_BLEND_BLEND) | GLS_DEPTHMASK_FALSE; @@ -591,6 +592,8 @@ void GL_DrawIndexed(showtris_t showtris) { const glIndex_t *indices = tess.indices; + GL_LoadUniforms(); + GL_LockArrays(tess.numverts); if (!(gl_config.caps & QGL_CAP_CLIENT_VA)) { From efd8da1ee48f7b7f43ead83189c8ab5eaec930d1 Mon Sep 17 00:00:00 2001 From: Andrey Nazarov Date: Mon, 7 Oct 2024 00:29:59 +0300 Subject: [PATCH 02/19] Add GL_DeleteBuffer() function. --- src/refresh/gl.h | 1 + src/refresh/models.c | 8 +------- src/refresh/state.c | 14 ++++++++++++++ src/refresh/surf.c | 10 ++-------- 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/src/refresh/gl.h b/src/refresh/gl.h index d7f374fef..07700bbbc 100644 --- a/src/refresh/gl.h +++ b/src/refresh/gl.h @@ -689,6 +689,7 @@ void GL_ForceTexture(glTmu_t tmu, GLuint texnum); void GL_BindTexture(glTmu_t tmu, GLuint texnum); void GL_ForceCubemap(GLuint texnum); void GL_BindCubemap(GLuint texnum); +void GL_DeleteBuffer(GLuint buffer); void GL_CommonStateBits(glStateBits_t bits); void GL_ScrollPos(vec2_t scroll, glStateBits_t bits); void GL_DrawOutlines(GLsizei count, const glIndex_t *indices, bool indexed); diff --git a/src/refresh/models.c b/src/refresh/models.c index b93b22693..9a6b68ccc 100644 --- a/src/refresh/models.c +++ b/src/refresh/models.c @@ -107,13 +107,7 @@ static void MOD_Free(model_t *model) Hunk_Free(&model->skeleton_hunk); #endif - if (model->buffer) { - // invalidate bindings - for (int i = 0; i < 2; i++) - if (gls.currentbuffer[i] == model->buffer) - gls.currentbuffer[i] = 0; - qglDeleteBuffers(1, &model->buffer); - } + GL_DeleteBuffer(model->buffer); memset(model, 0, sizeof(*model)); } diff --git a/src/refresh/state.c b/src/refresh/state.c index 2684477a2..17d08f5b2 100644 --- a/src/refresh/state.c +++ b/src/refresh/state.c @@ -92,6 +92,20 @@ void GL_BindCubemap(GLuint texnum) c.texSwitches++; } +void GL_DeleteBuffer(GLuint buffer) +{ + if (!buffer) + return; + + Q_assert(qglDeleteBuffers); + qglDeleteBuffers(1, &buffer); + + // invalidate bindings + for (int i = 0; i < q_countof(gls.currentbuffer); i++) + if (gls.currentbuffer[i] == buffer) + gls.currentbuffer[i] = 0; +} + void GL_CommonStateBits(glStateBits_t bits) { glStateBits_t diff = bits ^ gls.state_bits; diff --git a/src/refresh/surf.c b/src/refresh/surf.c index 10b06a463..b1ed6e167 100644 --- a/src/refresh/surf.c +++ b/src/refresh/surf.c @@ -986,17 +986,11 @@ void GL_FreeWorld(void) return; BSP_Free(gl_static.world.cache); + Z_Free(gl_static.world.vertices); + GL_DeleteBuffer(gl_static.world.buffer); - if (gl_static.world.vertices) - Z_Free(gl_static.world.vertices); - else if (qglDeleteBuffers) - qglDeleteBuffers(1, &gl_static.world.buffer); - - // invalidate bindings if (gls.currentva == VA_3D) gls.currentva = VA_NONE; - if (gls.currentbuffer[0] == gl_static.world.buffer) - gls.currentbuffer[0] = 0; memset(&gl_static.world, 0, sizeof(gl_static.world)); } From 44ba29ee56243e451a342f37eaae00cc72971535 Mon Sep 17 00:00:00 2001 From: Andrey Nazarov Date: Mon, 7 Oct 2024 00:51:19 +0300 Subject: [PATCH 03/19] Add R_Malloc() macro. --- src/refresh/gl.h | 3 +++ src/refresh/surf.c | 2 +- src/refresh/texture.c | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/refresh/gl.h b/src/refresh/gl.h index 07700bbbc..db97ea704 100644 --- a/src/refresh/gl.h +++ b/src/refresh/gl.h @@ -39,6 +39,9 @@ with this program; if not, write to the Free Software Foundation, Inc., * */ +#define R_Malloc(size) Z_TagMalloc(size, TAG_RENDERER) +#define R_Mallocz(size) Z_TagMallocz(size, TAG_RENDERER) + #if USE_GLES #define QGL_INDEX_TYPE GL_UNSIGNED_SHORT typedef GLushort glIndex_t; diff --git a/src/refresh/surf.c b/src/refresh/surf.c index b1ed6e167..bf832f990 100644 --- a/src/refresh/surf.c +++ b/src/refresh/surf.c @@ -1091,7 +1091,7 @@ void GL_LoadWorld(const char *name) if (create_surface_vbo(size)) { Com_DPrintf("%s: %zu bytes of vertex data as VBO\n", __func__, size); } else { - gl_static.world.vertices = Z_TagMalloc(size, TAG_RENDERER); + gl_static.world.vertices = R_Malloc(size); Com_DPrintf("%s: %zu bytes of vertex data on heap\n", __func__, size); } gl_static.world.buffer_size = size; diff --git a/src/refresh/texture.c b/src/refresh/texture.c index 0e9e8771a..58b8824cc 100644 --- a/src/refresh/texture.c +++ b/src/refresh/texture.c @@ -918,7 +918,7 @@ int IMG_ReadPixels(screenshot_t *s) s->bpp = bpp; s->rowbytes = rowbytes; - s->pixels = Z_TagMalloc(buf_size, TAG_RENDERER); + s->pixels = R_Malloc(buf_size); s->width = r_config.width; s->height = r_config.height; From 8a2080e5e5bca527d02c8462033ce0e3724d85ee Mon Sep 17 00:00:00 2001 From: Andrey Nazarov Date: Mon, 7 Oct 2024 00:53:40 +0300 Subject: [PATCH 04/19] Prepare hunk API for GPU lerp. Add Hunk_FreeToWatermark(). Make Hunk_*Alloc() accept alignment parameter. --- inc/system/hunk.h | 5 +++-- src/common/bsp.c | 12 +++++++----- src/refresh/models.c | 6 +++--- src/unix/hunk.c | 16 +++++++++++----- src/windows/hunk.c | 24 +++++++++++++++++++----- 5 files changed, 43 insertions(+), 20 deletions(-) diff --git a/inc/system/hunk.h b/inc/system/hunk.h index cc15ef473..233456bf5 100644 --- a/inc/system/hunk.h +++ b/inc/system/hunk.h @@ -27,7 +27,8 @@ typedef struct { void Hunk_Init(void); void Hunk_Begin(memhunk_t *hunk, size_t maxsize); -void *Hunk_TryAlloc(memhunk_t *hunk, size_t size); -void *Hunk_Alloc(memhunk_t *hunk, size_t size); +void *Hunk_TryAlloc(memhunk_t *hunk, size_t size, size_t align); +void *Hunk_Alloc(memhunk_t *hunk, size_t size, size_t align); +void Hunk_FreeToWatermark(memhunk_t *hunk, size_t size); void Hunk_End(memhunk_t *hunk); void Hunk_Free(memhunk_t *hunk); diff --git a/src/common/bsp.c b/src/common/bsp.c index 51ebe84ed..3bc3a688f 100644 --- a/src/common/bsp.c +++ b/src/common/bsp.c @@ -45,8 +45,10 @@ static cvar_t *map_visibility_patch; =============================================================================== */ +#define BSP_ALIGN 64 + #define BSP_ALLOC(size) \ - Hunk_Alloc(&bsp->hunk, size) + Hunk_Alloc(&bsp->hunk, size, BSP_ALIGN) #define BSP_ERROR(msg) \ Com_SetLastError(va("%s: %s", __func__, msg)) @@ -544,9 +546,9 @@ static size_t BSP_ParseLightgridHeader(bsp_t *bsp, const byte *in, size_t filele } return - Q_ALIGN(sizeof(grid->nodes[0]) * grid->numnodes, 64) + - Q_ALIGN(sizeof(grid->leafs[0]) * grid->numleafs, 64) + - Q_ALIGN(sizeof(grid->samples[0]) * grid->numsamples * grid->numstyles, 64); + Q_ALIGN(sizeof(grid->nodes[0]) * grid->numnodes, BSP_ALIGN) + + Q_ALIGN(sizeof(grid->leafs[0]) * grid->numleafs, BSP_ALIGN) + + Q_ALIGN(sizeof(grid->samples[0]) * grid->numsamples * grid->numstyles, BSP_ALIGN); } static bool BSP_ValidateLightgrid_r(const lightgrid_t *grid, uint32_t nodenum) @@ -812,7 +814,7 @@ int BSP_Load(const char *name, bsp_t **bsp_p) count++; // round to cacheline - memsize += Q_ALIGN(count * info->memsize, 64); + memsize += Q_ALIGN(count * info->memsize, BSP_ALIGN); maxpos = max(maxpos, ofs + len); } diff --git a/src/refresh/models.c b/src/refresh/models.c index 9a6b68ccc..5a216a9ab 100644 --- a/src/refresh/models.c +++ b/src/refresh/models.c @@ -24,7 +24,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #endif #include "format/sp2.h" -#define MOD_Malloc(size) Hunk_TryAlloc(&model->hunk, size) +#define MOD_Malloc(size) Hunk_TryAlloc(&model->hunk, size, 64) #define OOM_CHECK(x) do { if (!(x)) { ret = Q_ERR(ENOMEM); goto fail; } } while (0) #define ENSURE(x, e) if (!(x)) return e @@ -714,7 +714,7 @@ static void MD5_ParseError(const char *text) static void *MD5_Malloc(model_t *model, size_t size) { - void *ptr = Hunk_TryAlloc(&model->skeleton_hunk, size); + void *ptr = Hunk_TryAlloc(&model->skeleton_hunk, size, 64); if (!ptr) { Com_SetLastError("Out of memory"); longjmp(md5_jmpbuf, -1); @@ -1302,7 +1302,7 @@ static bool MD5_LoadSkins(model_t *model) const maliasmesh_t *mesh = &model->meshes[0]; mdl->num_skins = mesh->numskins; - mdl->skins = Hunk_TryAlloc(&model->skeleton_hunk, sizeof(mdl->skins[0]) * mdl->num_skins); + mdl->skins = Hunk_TryAlloc(&model->skeleton_hunk, sizeof(mdl->skins[0]) * mdl->num_skins, 64); if (!mdl->skins) { Com_EPrintf("Out of memory for MD5 skins\n"); return false; diff --git a/src/unix/hunk.c b/src/unix/hunk.c index 4bc763ed7..7375df744 100644 --- a/src/unix/hunk.c +++ b/src/unix/hunk.c @@ -48,15 +48,15 @@ void Hunk_Begin(memhunk_t *hunk, size_t maxsize) hunk->mapped = hunk->maxsize; } -void *Hunk_TryAlloc(memhunk_t *hunk, size_t size) +void *Hunk_TryAlloc(memhunk_t *hunk, size_t size, size_t align) { void *buf; - Q_assert(size <= SIZE_MAX - 63); + Q_assert(size <= SIZE_MAX - (align - 1)); Q_assert(hunk->cursize <= hunk->maxsize); // round to cacheline - size = Q_ALIGN(size, 64); + size = Q_ALIGN(size, align); if (size > hunk->maxsize - hunk->cursize) return NULL; @@ -65,14 +65,20 @@ void *Hunk_TryAlloc(memhunk_t *hunk, size_t size) return buf; } -void *Hunk_Alloc(memhunk_t *hunk, size_t size) +void *Hunk_Alloc(memhunk_t *hunk, size_t size, size_t align) { - void *buf = Hunk_TryAlloc(hunk, size); + void *buf = Hunk_TryAlloc(hunk, size, align); if (!buf) Com_Error(ERR_FATAL, "%s: couldn't allocate %zu bytes", __func__, size); return buf; } +void Hunk_FreeToWatermark(memhunk_t *hunk, size_t size) +{ + Q_assert(size <= hunk->cursize); + hunk->cursize = size; +} + void Hunk_End(memhunk_t *hunk) { size_t newsize; diff --git a/src/windows/hunk.c b/src/windows/hunk.c index db438578d..cd9c00df1 100644 --- a/src/windows/hunk.c +++ b/src/windows/hunk.c @@ -45,15 +45,15 @@ void Hunk_Begin(memhunk_t *hunk, size_t maxsize) hunk->maxsize, GetLastError()); } -void *Hunk_TryAlloc(memhunk_t *hunk, size_t size) +void *Hunk_TryAlloc(memhunk_t *hunk, size_t size, size_t align) { void *buf; - Q_assert(size <= SIZE_MAX - 63); + Q_assert(size <= SIZE_MAX - (align - 1)); Q_assert(hunk->cursize <= hunk->maxsize); // round to cacheline - size = Q_ALIGN(size, 64); + size = Q_ALIGN(size, align); if (size > hunk->maxsize - hunk->cursize) return NULL; @@ -69,14 +69,28 @@ void *Hunk_TryAlloc(memhunk_t *hunk, size_t size) return (byte *)hunk->base + hunk->cursize - size; } -void *Hunk_Alloc(memhunk_t *hunk, size_t size) +void *Hunk_Alloc(memhunk_t *hunk, size_t size, size_t align) { - void *buf = Hunk_TryAlloc(hunk, size); + void *buf = Hunk_TryAlloc(hunk, size, align); if (!buf) Com_Error(ERR_FATAL, "%s: couldn't allocate %zu bytes", __func__, size); return buf; } +void Hunk_FreeToWatermark(memhunk_t *hunk, size_t size) +{ + Q_assert(size <= hunk->cursize); + + size_t newsize = Q_ALIGN(size, pagesize); + if (newsize < hunk->cursize) { + Q_assert(hunk->base); + Q_assert(newsize <= hunk->maxsize); + VirtualFree((byte *)hunk->base + newsize, hunk->maxsize - newsize, MEM_DECOMMIT); + } + + hunk->cursize = size; +} + void Hunk_End(memhunk_t *hunk) { Q_assert(hunk->cursize <= hunk->maxsize); From 223508c91aa9a426999d2aec03a775331356d882 Mon Sep 17 00:00:00 2001 From: Andrey Nazarov Date: Mon, 7 Oct 2024 00:55:17 +0300 Subject: [PATCH 05/19] Add GL_BindingForTarget(). --- src/refresh/gl.h | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/src/refresh/gl.h b/src/refresh/gl.h index db97ea704..a4f02eb15 100644 --- a/src/refresh/gl.h +++ b/src/refresh/gl.h @@ -532,6 +532,14 @@ typedef enum { MAX_TMUS } glTmu_t; +typedef enum { + GLB_VBO, + GLB_EBO, + GLB_UBO, + + GLB_COUNT +} glBufferBinding_t; + typedef struct { GLfloat mvp[16]; GLfloat msky[2][16]; @@ -553,7 +561,7 @@ typedef struct { GLuint texnumcube; glStateBits_t state_bits; glArrayBits_t array_bits; - GLuint currentbuffer[2]; + GLuint currentbuffer[GLB_COUNT]; glVertexArray_t currentva; const GLfloat *currentmatrix; GLfloat view_matrix[16]; @@ -648,11 +656,23 @@ static inline void GL_LoadUniforms(void) } } -static inline void GL_BindBuffer(GLenum target, GLuint buffer) +static inline glBufferBinding_t GL_BindingForTarget(GLenum target) { - const int i = target == GL_ELEMENT_ARRAY_BUFFER; - Q_assert(i || target == GL_ARRAY_BUFFER); + switch (target) { + case GL_ARRAY_BUFFER: + return GLB_VBO; + case GL_ELEMENT_ARRAY_BUFFER: + return GLB_EBO; + case GL_UNIFORM_BUFFER: + return GLB_UBO; + default: + q_unreachable(); + } +} +static inline void GL_BindBuffer(GLenum target, GLuint buffer) +{ + glBufferBinding_t i = GL_BindingForTarget(target); if (gls.currentbuffer[i] != buffer) { qglBindBuffer(target, buffer); gls.currentbuffer[i] = buffer; From 1a170a8afce2dc12e0beb1022b696b250c6fcdc6 Mon Sep 17 00:00:00 2001 From: Andrey Nazarov Date: Mon, 7 Oct 2024 00:59:24 +0300 Subject: [PATCH 06/19] Allow binding VA_NONE. --- src/refresh/tess.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/src/refresh/tess.c b/src/refresh/tess.c index 79f67da5b..9124ac85d 100644 --- a/src/refresh/tess.c +++ b/src/refresh/tess.c @@ -543,22 +543,24 @@ static const glVaDesc_t arraydescs[VA_TOTAL][VERT_ATTR_COUNT] = { void GL_BindArrays(glVertexArray_t va) { - const GLfloat *ptr = tess.vertices; - GLuint buffer = 0; - if (gls.currentva == va) return; - if (va == VA_3D && !gl_static.world.vertices) { - buffer = gl_static.world.buffer; - ptr = NULL; - } else if (!(gl_config.caps & QGL_CAP_CLIENT_VA)) { - buffer = gl_static.vertex_buffer; - ptr = NULL; - } + if (va != VA_NONE) { + const GLfloat *ptr = tess.vertices; + GLuint buffer = 0; + + if (va == VA_3D && !gl_static.world.vertices) { + buffer = gl_static.world.buffer; + ptr = NULL; + } else if (!(gl_config.caps & QGL_CAP_CLIENT_VA)) { + buffer = gl_static.vertex_buffer; + ptr = NULL; + } - GL_BindBuffer(GL_ARRAY_BUFFER, buffer); - gl_backend->array_pointers(arraydescs[va], ptr); + GL_BindBuffer(GL_ARRAY_BUFFER, buffer); + gl_backend->array_pointers(arraydescs[va], ptr); + } gls.currentva = va; c.vertexArrayBinds++; @@ -566,6 +568,8 @@ void GL_BindArrays(glVertexArray_t va) void GL_LockArrays(GLsizei count) { + if (gls.currentva == VA_NONE) + return; if (gls.currentva == VA_3D && !gl_static.world.vertices) return; if (gl_config.caps & QGL_CAP_CLIENT_VA) { @@ -580,6 +584,8 @@ void GL_LockArrays(GLsizei count) void GL_UnlockArrays(void) { + if (gls.currentva == VA_NONE) + return; if (gls.currentva == VA_3D && !gl_static.world.vertices) return; if (!(gl_config.caps & QGL_CAP_CLIENT_VA)) @@ -592,6 +598,8 @@ void GL_DrawIndexed(showtris_t showtris) { const glIndex_t *indices = tess.indices; + Q_assert(gls.currentva != VA_NONE); + GL_LoadUniforms(); GL_LockArrays(tess.numverts); From 0b2cd8eae907579610cdddfa69dfe6dcb55c3bd9 Mon Sep 17 00:00:00 2001 From: Andrey Nazarov Date: Mon, 7 Oct 2024 01:01:27 +0300 Subject: [PATCH 07/19] Reduce scope of variable. --- src/refresh/mesh.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/refresh/mesh.c b/src/refresh/mesh.c index 38a3ad02f..caaca3506 100644 --- a/src/refresh/mesh.c +++ b/src/refresh/mesh.c @@ -824,7 +824,6 @@ void GL_DrawAliasModel(const model_t *model) const entity_t *ent = glr.ent; glCullResult_t cull; void (*tessfunc)(const maliasmesh_t *); - int i; if (glr.fd.extended) { newframenum = ent->frame % model->numframes; @@ -911,9 +910,9 @@ void GL_DrawAliasModel(const model_t *model) draw_alias_skeleton(model->skeleton); else #endif - for (i = 0; i < model->nummeshes; i++) { + for (int i = 0; i < model->nummeshes; i++) { const maliasmesh_t *mesh = &model->meshes[i]; - (*tessfunc)(mesh); + tessfunc(mesh); draw_alias_mesh(mesh->indices, mesh->numindices, mesh->tcoords, mesh->numverts, mesh->skins, mesh->numskins); From 9c9818e137974237b10dbd9568de62a8f84d1e39 Mon Sep 17 00:00:00 2001 From: Andrey Nazarov Date: Mon, 7 Oct 2024 01:03:13 +0300 Subject: [PATCH 08/19] Move glBindBufferBase to GL 3.0. --- src/refresh/qgl.c | 2 +- src/refresh/qgl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/refresh/qgl.c b/src/refresh/qgl.c index 486a02bc8..590ecf370 100644 --- a/src/refresh/qgl.c +++ b/src/refresh/qgl.c @@ -290,6 +290,7 @@ static const glsection_t sections[] = { // ensure full hardware support, including mipmaps. .caps = QGL_CAP_TEXTURE_MAX_LEVEL | QGL_CAP_TEXTURE_NON_POWER_OF_TWO, .functions = (const glfunction_t []) { + QGL_FN(BindBufferBase), QGL_FN(BindVertexArray), QGL_FN(DeleteVertexArrays), QGL_FN(GenVertexArrays), @@ -306,7 +307,6 @@ static const glsection_t sections[] = { .ver_es = QGL_VER(3, 0), .caps = QGL_CAP_SHADER, .functions = (const glfunction_t []) { - QGL_FN(BindBufferBase), QGL_FN(GetActiveUniformBlockiv), QGL_FN(GetUniformBlockIndex), QGL_FN(UniformBlockBinding), diff --git a/src/refresh/qgl.h b/src/refresh/qgl.h index 6207ad5cc..d7bfb8325 100644 --- a/src/refresh/qgl.h +++ b/src/refresh/qgl.h @@ -138,6 +138,7 @@ QGLAPI void (APIENTRYP qglVertexAttrib4f)(GLuint index, GLfloat x, GLfloat y, GL QGLAPI void (APIENTRYP qglVertexAttribPointer)(GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const void *pointer); // GL 3.0 +QGLAPI void (APIENTRYP qglBindBufferBase)(GLenum target, GLuint index, GLuint buffer); QGLAPI void (APIENTRYP qglBindFramebuffer)(GLenum target, GLuint framebuffer); QGLAPI void (APIENTRYP qglBindRenderbuffer)(GLenum target, GLuint renderbuffer); QGLAPI void (APIENTRYP qglBindVertexArray)(GLuint array); @@ -156,7 +157,6 @@ QGLAPI const GLubyte *(APIENTRYP qglGetStringi)(GLenum name, GLuint index); QGLAPI void (APIENTRYP qglRenderbufferStorage)(GLenum target, GLenum internalformat, GLsizei width, GLsizei height); // GL 3.1 -QGLAPI void (APIENTRYP qglBindBufferBase)(GLenum target, GLuint index, GLuint buffer); QGLAPI void (APIENTRYP qglGetActiveUniformBlockiv)(GLuint program, GLuint uniformBlockIndex, GLenum pname, GLint *params); QGLAPI GLuint (APIENTRYP qglGetUniformBlockIndex)(GLuint program, const GLchar *uniformBlockName); QGLAPI void (APIENTRYP qglUniformBlockBinding)(GLuint program, GLuint uniformBlockIndex, GLuint uniformBlockBinding); From 2a513c2b546132c1e84afe50fd5ec4c91160023f Mon Sep 17 00:00:00 2001 From: Andrey Nazarov Date: Mon, 7 Oct 2024 01:11:07 +0300 Subject: [PATCH 09/19] Print freed models count. --- src/refresh/models.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/refresh/models.c b/src/refresh/models.c index 5a216a9ab..cadd697f6 100644 --- a/src/refresh/models.c +++ b/src/refresh/models.c @@ -115,7 +115,7 @@ static void MOD_Free(model_t *model) void MOD_FreeUnused(void) { model_t *model; - int i; + int i, count = 0; for (i = 0, model = r_models; i < r_numModels; i++, model++) { if (!model->type) @@ -131,18 +131,28 @@ void MOD_FreeUnused(void) } else { // don't need this model MOD_Free(model); + count++; } } + + if (count) + Com_DPrintf("%s: %i models freed\n", __func__, count); } void MOD_FreeAll(void) { model_t *model; - int i; + int i, count = 0; - for (i = 0, model = r_models; i < r_numModels; i++, model++) - if (model->type) + for (i = 0, model = r_models; i < r_numModels; i++, model++) { + if (model->type) { MOD_Free(model); + count++; + } + } + + if (count) + Com_DPrintf("%s: %i models freed\n", __func__, count); r_numModels = 0; } From 009f142ff132a8bbf5aa4dc22c05bd97c66e7928 Mon Sep 17 00:00:00 2001 From: Andrey Nazarov Date: Sat, 28 Sep 2024 22:13:43 +0300 Subject: [PATCH 10/19] Support alias model interpolation on GPU. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Can work on ≥ GL 3.1, but only enable on ≥ GL 4.3 by default to avoid performance regressions. Depending on hardware capabilities, this can make MD5 rendering a lot (up to 10x) faster. --- doc/client.asciidoc | 9 ++ src/refresh/gl.h | 91 +++++++++++-- src/refresh/main.c | 25 ++++ src/refresh/mesh.c | 218 +++++++++++++++++++++++------- src/refresh/models.c | 314 ++++++++++++++++++++++++++----------------- src/refresh/qgl.c | 22 +++ src/refresh/qgl.h | 3 + src/refresh/shader.c | 312 +++++++++++++++++++++++++++++++++++++++--- src/refresh/state.c | 5 +- src/refresh/tess.c | 4 +- 10 files changed, 800 insertions(+), 203 deletions(-) diff --git a/doc/client.asciidoc b/doc/client.asciidoc index c31ae79aa..2e3dcf766 100644 --- a/doc/client.asciidoc +++ b/doc/client.asciidoc @@ -894,6 +894,15 @@ gl_md5_distance:: the viewer, otherwise use original model. Default value is 2048. Setting this to 0 disables distance LOD. +gl_gpulerp:: + Enables alias model interpolation on GPU for potential rendering + speedup. Default value is 1 (auto). If using OpenGL core profile, this + option is always enabled. If not using GLSL backend, this option is always + disabled. + - 0 — disabled + - 1 — auto (enabled on OpenGL 4.3 and higher) + - 2 — force enabled + gl_glowmap_intensity:: Intensity factor for entity glowmaps. Default value is 0.75. diff --git a/src/refresh/gl.h b/src/refresh/gl.h index a4f02eb15..41fca4841 100644 --- a/src/refresh/gl.h +++ b/src/refresh/gl.h @@ -68,6 +68,7 @@ typedef struct { bool use_shaders; bool use_cubemaps; bool use_bmodel_skies; + bool use_gpu_lerp; struct { bsp_t *cache; vec_t *vertices; @@ -79,6 +80,10 @@ typedef struct { GLuint warp_renderbuffer; GLuint warp_framebuffer; GLuint uniform_buffer; +#if USE_MD5 + GLuint skeleton_buffer; + GLuint skeleton_tex[2]; +#endif GLuint array_object; GLuint index_buffer; GLuint vertex_buffer; @@ -91,6 +96,8 @@ typedef struct { uint32_t inverse_intensity_66; uint32_t inverse_intensity_100; int nolm_mask; + int hunk_align; + int hunk_maxsize; float sintab[256]; byte latlngtab[NUMVERTEXNORMALS][2]; byte lightstylemap[MAX_LIGHTSTYLES]; @@ -139,6 +146,9 @@ typedef enum { QGL_CAP_QUERY_RESULT_NO_WAIT = BIT(10), QGL_CAP_CLIENT_VA = BIT(11), QGL_CAP_LINE_SMOOTH = BIT(12), + QGL_CAP_BUFFER_TEXTURE = BIT(13), + QGL_CAP_SHADER_STORAGE = BIT(14), + QGL_CAP_SKELETON_MASK = QGL_CAP_BUFFER_TEXTURE | QGL_CAP_SHADER_STORAGE, } glcap_t; #define QGL_VER(major, minor) ((major) * 100 + (minor)) @@ -154,6 +164,7 @@ typedef struct { int stencilbits; int max_texture_size_log2; int max_texture_size; + int ssbo_align; } glConfig_t; extern glStatic_t gl_static; @@ -279,6 +290,14 @@ static inline void GL_AdvanceValue(float *restrict val, float target, float spee * */ +#define MOD_MAXSIZE_GPU 0x1000000 + +#if (defined _WIN32) && !(defined _WIN64) +#define MOD_MAXSIZE_CPU 0x400000 +#else +#define MOD_MAXSIZE_CPU 0x800000 +#endif + typedef struct { float st[2]; } maliastc_t; @@ -392,10 +411,9 @@ typedef struct { int nummeshes; int numframes; - maliasmesh_t *meshes; // md2 / md3 + maliasmesh_t *meshes; // MD2 / MD3 #if USE_MD5 - md5_model_t *skeleton; // md5 - memhunk_t skeleton_hunk; // md5 + md5_model_t *skeleton; // MD5 #endif union { maliasframe_t *frames; @@ -477,18 +495,26 @@ typedef enum { GLS_DEFAULT_SKY = BIT(14), GLS_DEFAULT_FLARE = BIT(15), - GLS_SHADE_SMOOTH = BIT(16), - GLS_SCROLL_X = BIT(17), - GLS_SCROLL_Y = BIT(18), - GLS_SCROLL_FLIP = BIT(19), - GLS_SCROLL_SLOW = BIT(20), + GLS_MESH_MD2 = BIT(16), + GLS_MESH_MD5 = BIT(17), + GLS_MESH_LERP = BIT(18), + GLS_MESH_SHELL = BIT(19), + GLS_MESH_SHADE = BIT(20), + + GLS_SHADE_SMOOTH = BIT(21), + GLS_SCROLL_X = BIT(22), + GLS_SCROLL_Y = BIT(23), + GLS_SCROLL_FLIP = BIT(24), + GLS_SCROLL_SLOW = BIT(25), GLS_BLEND_MASK = GLS_BLEND_BLEND | GLS_BLEND_ADD | GLS_BLEND_MODULATE, GLS_COMMON_MASK = GLS_DEPTHMASK_FALSE | GLS_DEPTHTEST_DISABLE | GLS_CULL_DISABLE | GLS_BLEND_MASK, GLS_SKY_MASK = GLS_CLASSIC_SKY | GLS_DEFAULT_SKY, + GLS_MESH_ANY = GLS_MESH_MD2 | GLS_MESH_MD5, + GLS_MESH_MASK = GLS_MESH_ANY | GLS_MESH_LERP | GLS_MESH_SHELL | GLS_MESH_SHADE, GLS_SHADER_MASK = GLS_ALPHATEST_ENABLE | GLS_TEXTURE_REPLACE | GLS_SCROLL_ENABLE | GLS_LIGHTMAP_ENABLE | GLS_WARP_ENABLE | GLS_INTENSITY_ENABLE | GLS_GLOWMAP_ENABLE | - GLS_SKY_MASK | GLS_DEFAULT_FLARE, + GLS_SKY_MASK | GLS_DEFAULT_FLARE | GLS_MESH_MASK, GLS_SCROLL_MASK = GLS_SCROLL_ENABLE | GLS_SCROLL_X | GLS_SCROLL_Y | GLS_SCROLL_FLIP | GLS_SCROLL_SLOW, } glStateBits_t; @@ -497,8 +523,16 @@ typedef enum { VERT_ATTR_TC, VERT_ATTR_LMTC, VERT_ATTR_COLOR, + VERT_ATTR_COUNT, - VERT_ATTR_COUNT + // MD2 + VERT_ATTR_MESH_TC = 0, + VERT_ATTR_MESH_NEW_POS = 1, + VERT_ATTR_MESH_OLD_POS = 2, + + // MD5 + VERT_ATTR_MESH_NORM = 1, + VERT_ATTR_MESH_VERT = 2, } glVertexAttr_t; typedef enum { @@ -507,6 +541,8 @@ typedef enum { GLA_TC = BIT(VERT_ATTR_TC), GLA_LMTC = BIT(VERT_ATTR_LMTC), GLA_COLOR = BIT(VERT_ATTR_COLOR), + GLA_MESH_STATIC = MASK(2), + GLA_MESH_LERP = MASK(3), } glArrayBits_t; typedef enum { @@ -529,7 +565,11 @@ typedef enum { TMU_TEXTURE, TMU_LIGHTMAP, TMU_GLOWMAP, - MAX_TMUS + MAX_TMUS, + + // MD5 + TMU_SKEL_WEIGHTS, + TMU_SKEL_JOINTNUMS, } glTmu_t; typedef enum { @@ -540,15 +580,38 @@ typedef enum { GLB_COUNT } glBufferBinding_t; +enum { UBO_UNIFORMS, UBO_SKELETON }; +enum { SSBO_WEIGHTS, SSBO_JOINTNUMS }; + +typedef struct { + vec4_t oldscale; + vec4_t newscale; + vec4_t translate; + vec4_t shadedir; + vec4_t color; + vec4_t pad_0; + GLfloat pad_1; + GLfloat pad_2; + GLfloat pad_3; + GLuint weight_ofs; + GLuint jointnum_ofs; + GLfloat shellscale; + GLfloat backlerp; + GLfloat frontlerp; +} glMeshBlock_t; + typedef struct { GLfloat mvp[16]; - GLfloat msky[2][16]; + union { + GLfloat msky[2][16]; + glMeshBlock_t mesh; + }; GLfloat time; GLfloat modulate; GLfloat add; GLfloat intensity; GLfloat intensity2; - GLfloat pad_1; + GLfloat pad_4; GLfloat w_amp[2]; GLfloat w_phase[2]; GLfloat scroll[2]; @@ -572,6 +635,8 @@ typedef struct { extern glState_t gls; +#define VBO_OFS(n) ((void *)(n)) + typedef struct { uint8_t size; bool type; diff --git a/src/refresh/main.c b/src/refresh/main.c index 514b432c5..9f5318c5b 100644 --- a/src/refresh/main.c +++ b/src/refresh/main.c @@ -1013,6 +1013,31 @@ static void GL_SetupConfig(void) qglDebugMessageCallback(myDebugProc, NULL); } + if (gl_config.caps & QGL_CAP_SHADER_STORAGE) { + integer = 0; + qglGetIntegerv(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, &integer); + if (integer < 2) { + Com_DPrintf("Not enough shader storage blocks available\n"); + gl_config.caps &= ~QGL_CAP_SHADER_STORAGE; + } else { + integer = 1; + qglGetIntegerv(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT, &integer); + if (integer & (integer - 1)) + integer = Q_npot32(integer); + Com_DPrintf("SSBO alignment: %d\n", integer); + gl_config.ssbo_align = integer; + } + } + + if (gl_config.caps & QGL_CAP_BUFFER_TEXTURE) { + integer = 0; + qglGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &integer); + if (integer < MOD_MAXSIZE_GPU) { + Com_DPrintf("Not enough buffer texture size available\n"); + gl_config.caps &= ~QGL_CAP_BUFFER_TEXTURE; + } + } + GL_ShowErrors(__func__); } diff --git a/src/refresh/mesh.c b/src/refresh/mesh.c index caaca3506..40c52bcd9 100644 --- a/src/refresh/mesh.c +++ b/src/refresh/mesh.c @@ -24,17 +24,18 @@ typedef enum { SHADOW_ONLY } drawshadow_t; -static unsigned oldframenum; -static unsigned newframenum; -static float frontlerp; -static float backlerp; -static vec3_t origin; -static vec3_t oldscale; -static vec3_t newscale; -static vec3_t translate; -static vec_t shellscale; -static vec4_t color; -static GLuint buffer; +static unsigned oldframenum; +static unsigned newframenum; +static float frontlerp; +static float backlerp; +static vec3_t origin; +static vec3_t oldscale; +static vec3_t newscale; +static vec3_t translate; +static vec_t shellscale; +static vec4_t color; +static glStateBits_t meshbits; +static GLuint buffer; static vec3_t shadedir; static bool dotshading; @@ -422,19 +423,32 @@ static void setup_celshading(void) celscale = 1.0f - Distance(origin, glr.fd.vieworg) / 700.0f; } +static void uniform_mesh_color(float r, float g, float b, float a) +{ + if (gls.currentva) { + GL_Color(r, g, b, a); + } else { + Vector4Set(gls.u_block.mesh.color, r, g, b, a); + gls.u_block_dirty = true; + } +} + static void draw_celshading(const glIndex_t *indices, int num_indices) { if (celscale < 0.01f) return; GL_BindTexture(TMU_TEXTURE, TEXNUM_BLACK); - GL_StateBits(GLS_BLEND_BLEND); - GL_ArrayBits(GLA_VERTEX); + GL_StateBits(GLS_BLEND_BLEND | (meshbits & ~GLS_MESH_SHADE)); + if (gls.currentva) + GL_ArrayBits(GLA_VERTEX); + + uniform_mesh_color(0, 0, 0, color[3] * celscale); + GL_LoadUniforms(); qglLineWidth(gl_celshading->value * celscale); qglPolygonMode(GL_FRONT_AND_BACK, GL_LINE); qglCullFace(GL_FRONT); - GL_Color(0, 0, 0, color[3] * celscale); GL_DrawTriangles(num_indices, indices); qglCullFace(GL_BACK); qglPolygonMode(GL_FRONT_AND_BACK, GL_FILL); @@ -538,7 +552,6 @@ static void draw_shadow(const glIndex_t *indices, int num_indices) // load shadow projection matrix GL_LoadMatrix(shadowmatrix); - GL_LoadUniforms(); // eliminate z-fighting by utilizing stencil buffer, if available if (gl_config.stencilbits) { @@ -547,13 +560,16 @@ static void draw_shadow(const glIndex_t *indices, int num_indices) qglStencilOp(GL_KEEP, GL_KEEP, GL_INCR); } - GL_StateBits(GLS_BLEND_BLEND); GL_BindTexture(TMU_TEXTURE, TEXNUM_WHITE); - GL_ArrayBits(GLA_VERTEX); + GL_StateBits(GLS_BLEND_BLEND | (meshbits & ~GLS_MESH_SHADE)); + if (gls.currentva) + GL_ArrayBits(GLA_VERTEX); + + uniform_mesh_color(0, 0, 0, color[3] * 0.5f); + GL_LoadUniforms(); qglEnable(GL_POLYGON_OFFSET_FILL); qglPolygonOffset(-1.0f, -2.0f); - GL_Color(0, 0, 0, color[3] * 0.5f); GL_DrawTriangles(num_indices, indices); qglDisable(GL_POLYGON_OFFSET_FILL); @@ -590,6 +606,23 @@ static const image_t *skin_for_mesh(image_t **skins, int num_skins) return skins[ent->skinnum]; } +static void bind_alias_arrays(const maliasmesh_t *mesh) +{ + uintptr_t base = (uintptr_t)mesh->verts; + uintptr_t old_ofs = base + oldframenum * mesh->numverts * sizeof(mesh->verts[0]); + uintptr_t new_ofs = base + newframenum * mesh->numverts * sizeof(mesh->verts[0]); + + qglVertexAttribPointer(VERT_ATTR_MESH_TC, 2, GL_FLOAT, GL_FALSE, 0, mesh->tcoords); + qglVertexAttribIPointer(VERT_ATTR_MESH_NEW_POS, 4, GL_SHORT, 0, VBO_OFS(new_ofs)); + + if (oldframenum == newframenum) { + GL_ArrayBits(GLA_MESH_STATIC); + } else { + qglVertexAttribIPointer(VERT_ATTR_MESH_OLD_POS, 4, GL_SHORT, 0, VBO_OFS(old_ofs)); + GL_ArrayBits(GLA_MESH_LERP); + } +} + static void draw_alias_mesh(const glIndex_t *indices, int num_indices, const maliastc_t *tcoords, int num_verts, image_t **skins, int num_skins) @@ -607,13 +640,19 @@ static void draw_alias_mesh(const glIndex_t *indices, int num_indices, // fall back to entity matrix GL_LoadMatrix(glr.entmatrix); + + uniform_mesh_color(color[0], color[1], color[2], color[3]); GL_LoadUniforms(); // avoid drawing hidden faces for transparent gun by pre-filling depth buffer // muzzle flashes are excluded by checking for RF_FULLBRIGHT bit if ((glr.ent->flags & (RF_TRANSLUCENT | RF_WEAPONMODEL | RF_FULLBRIGHT)) == (RF_TRANSLUCENT | RF_WEAPONMODEL)) { - GL_StateBits(GLS_DEFAULT); - GL_ArrayBits(GLA_VERTEX); + if (gls.currentva) { + GL_StateBits(GLS_DEFAULT); + GL_ArrayBits(GLA_VERTEX); + } else { + GL_StateBits(meshbits & ~GLS_MESH_SHADE); + } GL_BindTexture(TMU_TEXTURE, TEXNUM_WHITE); qglColorMask(0, 0, 0, 0); @@ -625,7 +664,9 @@ static void draw_alias_mesh(const glIndex_t *indices, int num_indices, } state = GLS_INTENSITY_ENABLE; - if (dotshading) + if (!gls.currentva) + state |= meshbits; + else if (dotshading) state |= GLS_SHADE_SMOOTH; if (glr.ent->flags & RF_TRANSLUCENT) @@ -642,16 +683,14 @@ static void draw_alias_mesh(const glIndex_t *indices, int num_indices, if (skin->texnum2) GL_BindTexture(TMU_GLOWMAP, skin->texnum2); - if (dotshading) { - GL_ArrayBits(GLA_VERTEX | GLA_TC | GLA_COLOR); - } else { - GL_ArrayBits(GLA_VERTEX | GLA_TC); - GL_Color(color[0], color[1], color[2], color[3]); + if (gls.currentva) { + if (dotshading) + GL_ArrayBits(GLA_VERTEX | GLA_TC | GLA_COLOR); + else + GL_ArrayBits(GLA_VERTEX | GLA_TC); + gl_backend->tex_coord_pointer((const GLfloat *)tcoords); } - GL_BindBuffer(GL_ARRAY_BUFFER, buffer); - gl_backend->tex_coord_pointer((const GLfloat *)tcoords); - GL_LockArrays(num_verts); GL_DrawTriangles(num_indices, indices); @@ -758,9 +797,41 @@ static void lerp_alias_skeleton(const md5_model_t *model) #pragma GCC reset_options #endif +static void bind_skel_arrays(const md5_mesh_t *mesh, const md5_joint_t *skel) +{ + if (gl_config.caps & QGL_CAP_SHADER_STORAGE) { + qglBindBufferRange(GL_SHADER_STORAGE_BUFFER, SSBO_WEIGHTS, buffer, + (uintptr_t)mesh->weights, mesh->num_weights * sizeof(mesh->weights[0])); + qglBindBufferRange(GL_SHADER_STORAGE_BUFFER, SSBO_JOINTNUMS, buffer, + (uintptr_t)mesh->jointnums, Q_ALIGN(mesh->num_weights, sizeof(uint32_t))); + } else { + Q_assert(gl_config.caps & QGL_CAP_BUFFER_TEXTURE); + + gls.u_block.mesh.weight_ofs = (uintptr_t)mesh->weights / sizeof(mesh->weights[0]); + gls.u_block.mesh.jointnum_ofs = (uintptr_t)mesh->jointnums; + + GL_ActiveTexture(TMU_SKEL_WEIGHTS); + qglBindTexture(GL_TEXTURE_BUFFER, gl_static.skeleton_tex[0]); + qglTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, buffer); + + GL_ActiveTexture(TMU_SKEL_JOINTNUMS); + qglBindTexture(GL_TEXTURE_BUFFER, gl_static.skeleton_tex[1]); + qglTexBuffer(GL_TEXTURE_BUFFER, GL_R8UI, buffer); + } + + uintptr_t base = (uintptr_t)mesh->vertices; + qglVertexAttribPointer (VERT_ATTR_MESH_TC, 2, GL_FLOAT, GL_FALSE, 0, mesh->tcoords); + qglVertexAttribPointer (VERT_ATTR_MESH_NORM, 3, GL_FLOAT, GL_FALSE, sizeof(mesh->vertices[0]), VBO_OFS(base)); + qglVertexAttribIPointer(VERT_ATTR_MESH_VERT, 2, GL_UNSIGNED_SHORT, sizeof(mesh->vertices[0]), VBO_OFS(base + sizeof(vec3_t))); + + GL_ArrayBits(GLA_MESH_LERP); +} + static void draw_skeleton_mesh(const md5_model_t *model, const md5_mesh_t *mesh, const md5_joint_t *skel) { - if (glr.ent->flags & RF_SHELL_MASK) + if (buffer) + bind_skel_arrays(mesh, skel); + else if (glr.ent->flags & RF_SHELL_MASK) tess_shell_skel(mesh, skel); else if (dotshading) tess_shade_skel(mesh, skel); @@ -772,6 +843,11 @@ static void draw_skeleton_mesh(const md5_model_t *model, const md5_mesh_t *mesh, model->skins, model->num_skins); } +typedef struct { + vec4_t pos; + vec4_t axis[3]; +} glJoint_t; + static void draw_alias_skeleton(const md5_model_t *model) { const md5_joint_t *skel = temp_skeleton; @@ -781,6 +857,27 @@ static void draw_alias_skeleton(const md5_model_t *model) else lerp_alias_skeleton(model); + if (buffer) { + glJoint_t joints[MD5_MAX_JOINTS]; + + for (int i = 0; i < model->num_joints; i++) { + const md5_joint_t *in = &skel[i]; + glJoint_t *out = &joints[i]; + VectorCopy(in->pos, out->pos); + out->pos[3] = in->scale; + VectorCopy(in->axis[0], out->axis[0]); + VectorCopy(in->axis[1], out->axis[1]); + VectorCopy(in->axis[2], out->axis[2]); + } + + GL_BindBuffer(GL_UNIFORM_BUFFER, gl_static.skeleton_buffer); + qglBufferData(GL_UNIFORM_BUFFER, sizeof(joints), NULL, GL_STREAM_DRAW); + qglBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(joints[0]) * model->num_joints, joints); + + meshbits &= ~GLS_MESH_MD2; + meshbits |= GLS_MESH_MD5 | GLS_MESH_LERP; + } + for (int i = 0; i < model->num_meshes; i++) draw_skeleton_mesh(model, &model->meshes[i], skel); } @@ -875,27 +972,56 @@ void GL_DrawAliasModel(const model_t *model) // setup scale and translate vectors setup_frame_scale(model); - // select proper tessfunc - if (ent->flags & RF_SHELL_MASK) { - shellscale = (ent->flags & RF_WEAPONMODEL) ? - WEAPONSHELL_SCALE : POWERSUIT_SCALE; - tessfunc = newframenum == oldframenum ? - tess_static_shell : tess_lerped_shell; - } else if (dotshading) { - tessfunc = newframenum == oldframenum ? - tess_static_shade : tess_lerped_shade; + if (ent->flags & RF_SHELL_MASK) + shellscale = (ent->flags & RF_WEAPONMODEL) ? WEAPONSHELL_SCALE : POWERSUIT_SCALE; + + buffer = model->buffer; + GL_BindBuffer(GL_ARRAY_BUFFER, model->buffer); + GL_BindBuffer(GL_ELEMENT_ARRAY_BUFFER, model->buffer); + + if (gl_static.use_gpu_lerp) { + Q_assert(buffer); + + GL_BindArrays(VA_NONE); + tessfunc = bind_alias_arrays; + + meshbits = GLS_MESH_MD2; + if (oldframenum != newframenum) + meshbits |= GLS_MESH_LERP; + if (glr.ent->flags & RF_SHELL_MASK) + meshbits |= GLS_MESH_SHELL; + else if (dotshading) + meshbits |= GLS_MESH_SHADE; + + VectorCopy(oldscale, gls.u_block.mesh.oldscale); + VectorCopy(newscale, gls.u_block.mesh.newscale); + VectorCopy(translate, gls.u_block.mesh.translate); + VectorCopy(shadedir, gls.u_block.mesh.shadedir); + Vector4Copy(color, gls.u_block.mesh.color); + gls.u_block.mesh.shellscale = shellscale; + gls.u_block.mesh.backlerp = backlerp; + gls.u_block.mesh.frontlerp = frontlerp; } else { - tessfunc = newframenum == oldframenum ? - tess_static_plain : tess_lerped_plain; + Q_assert(!buffer); + + GL_BindArrays(dotshading ? VA_MESH_SHADE : VA_MESH_FLAT); + meshbits = 0; + + // select proper tessfunc + if (ent->flags & RF_SHELL_MASK) { + tessfunc = newframenum == oldframenum ? + tess_static_shell : tess_lerped_shell; + } else if (dotshading) { + tessfunc = newframenum == oldframenum ? + tess_static_shade : tess_lerped_shade; + } else { + tessfunc = newframenum == oldframenum ? + tess_static_plain : tess_lerped_plain; + } } GL_RotateForEntity(false); - GL_BindArrays(dotshading ? VA_MESH_SHADE : VA_MESH_FLAT); - - buffer = model->buffer; - GL_BindBuffer(GL_ELEMENT_ARRAY_BUFFER, model->buffer); - if (ent->flags & RF_WEAPONMODEL) setup_weaponmodel(); diff --git a/src/refresh/models.c b/src/refresh/models.c index cadd697f6..bc812a8a4 100644 --- a/src/refresh/models.c +++ b/src/refresh/models.c @@ -24,9 +24,12 @@ with this program; if not, write to the Free Software Foundation, Inc., #endif #include "format/sp2.h" -#define MOD_Malloc(size) Hunk_TryAlloc(&model->hunk, size, 64) +#define MOD_GpuMalloc(size) \ + Hunk_TryAlloc(&model->hunk, size, gl_static.hunk_align) + +#define MOD_CpuMalloc(size) \ + (gl_static.use_gpu_lerp ? R_Mallocz(size) : MOD_GpuMalloc(size)) -#define OOM_CHECK(x) do { if (!(x)) { ret = Q_ERR(ENOMEM); goto fail; } } while (0) #define ENSURE(x, e) if (!(x)) return e // this used to be MAX_MODELS * 2, but not anymore. MAX_MODELS is 8192 now and @@ -86,10 +89,8 @@ static void MOD_List_f(void) size_t model_size = model->hunk.mapped; int flag = ' '; #if USE_MD5 - if (model->skeleton) { - model_size += model->skeleton_hunk.mapped; + if (model->skeleton) flag = '*'; - } #endif Com_Printf("%c%c %8zu : %s\n", types[model->type], flag, model_size, model->name); @@ -100,14 +101,50 @@ static void MOD_List_f(void) Com_Printf("Total resident: %zu\n", bytes); } -static void MOD_Free(model_t *model) +#if USE_MD5 +static void MD5_Free(md5_model_t *mdl); +#endif + +static void MOD_FreeAlias(model_t *model) { Hunk_Free(&model->hunk); + + GL_DeleteBuffer(model->buffer); + + // all memory is allocated on hunk if not using GPU lerp + if (!gl_static.use_gpu_lerp) + return; + #if USE_MD5 - Hunk_Free(&model->skeleton_hunk); + MD5_Free(model->skeleton); #endif - GL_DeleteBuffer(model->buffer); + for (int i = 0; i < model->nummeshes; i++) { + Z_Free(model->meshes[i].skins); +#if USE_MD5 + Z_Free(model->meshes[i].skinnames); +#endif + } + + Z_Free(model->meshes); + Z_Free(model->frames); +} + +static void MOD_Free(model_t *model) +{ + switch (model->type) { + case MOD_SPRITE: + Z_Free(model->spriteframes); + break; + case MOD_ALIAS: + MOD_FreeAlias(model); + break; + case MOD_EMPTY: + case MOD_FREE: + break; + default: + Q_assert(!"bad model type"); + } memset(model, 0, sizeof(*model)); } @@ -124,10 +161,6 @@ void MOD_FreeUnused(void) if (model->registration_sequence == r_registration_sequence) { // make sure it is paged in Com_PageInMemory(model->hunk.base, model->hunk.cursize); -#if USE_MD5 - if (model->skeleton_hunk.base) - Com_PageInMemory(model->skeleton_hunk.base, model->skeleton_hunk.cursize); -#endif } else { // don't need this model MOD_Free(model); @@ -198,10 +231,8 @@ static int MOD_LoadSP2(model_t *model, const void *rawdata, size_t length) return Q_ERR_INVALID_FORMAT; } - Hunk_Begin(&model->hunk, sizeof(model->spriteframes[0]) * header.numframes); model->type = MOD_SPRITE; - - model->spriteframes = MOD_Malloc(sizeof(model->spriteframes[0]) * header.numframes); + model->spriteframes = R_Malloc(sizeof(model->spriteframes[0]) * header.numframes); model->numframes = header.numframes; src_frame = (dsp2frame_t *)((byte *)rawdata + sizeof(dsp2header_t)); @@ -224,8 +255,6 @@ static int MOD_LoadSP2(model_t *model, const void *rawdata, size_t length) dst_frame++; } - Hunk_End(&model->hunk); - return Q_ERR_SUCCESS; } @@ -255,6 +284,25 @@ static const char *MOD_ValidateMD2(const dmd2header_t *header, size_t length) return NULL; } +static bool MOD_AllocMesh(model_t *model, maliasmesh_t *mesh) +{ + if (!(mesh->verts = MOD_GpuMalloc(sizeof(mesh->verts[0]) * mesh->numverts * model->numframes))) + return false; + if (!(mesh->tcoords = MOD_GpuMalloc(sizeof(mesh->tcoords[0]) * mesh->numverts))) + return false; + if (!(mesh->indices = MOD_GpuMalloc(sizeof(mesh->indices[0]) * mesh->numindices))) + return false; + if (!mesh->numskins) + return true; + if (!(mesh->skins = MOD_CpuMalloc(sizeof(mesh->skins[0]) * mesh->numskins))) + return false; +#if USE_MD5 + if (!(mesh->skinnames = MOD_CpuMalloc(sizeof(mesh->skinnames[0]) * mesh->numskins))) + return false; +#endif + return true; +} + static int MOD_LoadMD2(model_t *model, const void *rawdata, size_t length) { dmd2header_t header; @@ -267,7 +315,7 @@ static int MOD_LoadMD2(model_t *model, const void *rawdata, size_t length) maliasvert_t *dst_vert; maliastc_t *dst_tc; maliasmesh_t *mesh; - int i, j, k, val, ret; + int i, j, k, val; uint16_t remap[TESS_MAX_INDICES]; uint16_t vertIndices[TESS_MAX_INDICES]; uint16_t tcIndices[TESS_MAX_INDICES]; @@ -359,25 +407,22 @@ static int MOD_LoadMD2(model_t *model, const void *rawdata, size_t length) return Q_ERR_INVALID_FORMAT; } - Hunk_Begin(&model->hunk, 0x400000); + Hunk_Begin(&model->hunk, gl_static.hunk_maxsize); model->type = MOD_ALIAS; model->nummeshes = 1; model->numframes = header.num_frames; - OOM_CHECK(model->meshes = MOD_Malloc(sizeof(model->meshes[0]))); - OOM_CHECK(model->frames = MOD_Malloc(header.num_frames * sizeof(model->frames[0]))); + model->meshes = MOD_CpuMalloc(sizeof(model->meshes[0])); + model->frames = MOD_CpuMalloc(sizeof(model->frames[0]) * header.num_frames); + if (!model->meshes || !model->frames) + return Q_ERR(ENOMEM); mesh = model->meshes; mesh->numtris = numindices / 3; mesh->numindices = numindices; mesh->numverts = numverts; mesh->numskins = header.num_skins; - OOM_CHECK(mesh->verts = MOD_Malloc(numverts * header.num_frames * sizeof(mesh->verts[0]))); - OOM_CHECK(mesh->tcoords = MOD_Malloc(numverts * sizeof(mesh->tcoords[0]))); - OOM_CHECK(mesh->indices = MOD_Malloc(numindices * sizeof(mesh->indices[0]))); - OOM_CHECK(mesh->skins = MOD_Malloc(header.num_skins * sizeof(mesh->skins[0]))); -#if USE_MD5 - OOM_CHECK(mesh->skinnames = MOD_Malloc(header.num_skins * sizeof(mesh->skinnames[0]))); -#endif + if (!MOD_AllocMesh(model, mesh)) + return Q_ERR(ENOMEM); if (mesh->numtris != header.num_tris) Com_DPrintf("%s has %d bad triangles\n", model->name, header.num_tris - mesh->numtris); @@ -394,10 +439,8 @@ static int MOD_LoadMD2(model_t *model, const void *rawdata, size_t length) #else maliasskinname_t skinname; #endif - if (!Q_memccpy(skinname, src_skin, 0, sizeof(maliasskinname_t))) { - ret = Q_ERR_STRING_TRUNCATED; - goto fail; - } + if (!Q_memccpy(skinname, src_skin, 0, sizeof(maliasskinname_t))) + return Q_ERR_STRING_TRUNCATED; mesh->skins[i] = IMG_Find(skinname, IT_SKIN, IF_NONE); src_skin += MD2_MAX_SKINNAME; } @@ -466,12 +509,7 @@ static int MOD_LoadMD2(model_t *model, const void *rawdata, size_t length) dst_frame++; } - Hunk_End(&model->hunk); return Q_ERR_SUCCESS; - -fail: - Hunk_Free(&model->hunk); - return ret; } #if USE_MD3 @@ -510,7 +548,7 @@ static int MOD_LoadMD3Mesh(model_t *model, maliasmesh_t *mesh, maliastc_t *dst_tc; glIndex_t *dst_idx; uint32_t index; - int i, j, k, ret; + int i, j, k; const char *err; if (length < sizeof(header)) @@ -530,13 +568,8 @@ static int MOD_LoadMD3Mesh(model_t *model, maliasmesh_t *mesh, mesh->numindices = header.num_tris * 3; mesh->numverts = header.num_verts; mesh->numskins = header.num_skins; - OOM_CHECK(mesh->verts = MOD_Malloc(sizeof(mesh->verts[0]) * header.num_verts * model->numframes)); - OOM_CHECK(mesh->tcoords = MOD_Malloc(sizeof(mesh->tcoords[0]) * header.num_verts)); - OOM_CHECK(mesh->indices = MOD_Malloc(sizeof(mesh->indices[0]) * header.num_tris * 3)); - OOM_CHECK(mesh->skins = MOD_Malloc(sizeof(mesh->skins[0]) * header.num_skins)); -#if USE_MD5 - OOM_CHECK(mesh->skinnames = MOD_Malloc(sizeof(mesh->skinnames[0]) * header.num_skins)); -#endif + if (!MOD_AllocMesh(model, mesh)) + return Q_ERR(ENOMEM); // load all skins src_skin = (dmd3skin_t *)(rawdata + header.ofs_skins); @@ -598,9 +631,6 @@ static int MOD_LoadMD3Mesh(model_t *model, maliasmesh_t *mesh, *offset_p = header.meshsize; return Q_ERR_SUCCESS; - -fail: - return ret; } static const char *MOD_ValidateMD3(const dmd3header_t *header, size_t length) @@ -645,12 +675,14 @@ static int MOD_LoadMD3(model_t *model, const void *rawdata, size_t length) return Q_ERR_INVALID_FORMAT; } - Hunk_Begin(&model->hunk, 0x400000); + Hunk_Begin(&model->hunk, gl_static.hunk_maxsize); model->type = MOD_ALIAS; model->numframes = header.num_frames; model->nummeshes = header.num_meshes; - OOM_CHECK(model->meshes = MOD_Malloc(sizeof(model->meshes[0]) * header.num_meshes)); - OOM_CHECK(model->frames = MOD_Malloc(sizeof(model->frames[0]) * header.num_frames)); + model->meshes = MOD_CpuMalloc(sizeof(model->meshes[0]) * header.num_meshes); + model->frames = MOD_CpuMalloc(sizeof(model->frames[0]) * header.num_frames); + if (!model->meshes || !model->frames) + return Q_ERR(ENOMEM); // load all frames src_frame = (dmd3frame_t *)((byte *)rawdata + header.ofs_frames); @@ -670,7 +702,7 @@ static int MOD_LoadMD3(model_t *model, const void *rawdata, size_t length) for (i = 0; i < header.num_meshes; i++) { ret = MOD_LoadMD3Mesh(model, &model->meshes[i], src_mesh, remaining, &offset); if (ret) - goto fail; + return ret; src_mesh += offset; remaining -= offset; } @@ -689,12 +721,7 @@ static int MOD_LoadMD3(model_t *model, const void *rawdata, size_t length) dst_frame++; } - Hunk_End(&model->hunk); return Q_ERR_SUCCESS; - -fail: - Hunk_Free(&model->hunk); - return ret; } #endif @@ -722,9 +749,9 @@ static void MD5_ParseError(const char *text) longjmp(md5_jmpbuf, -1); } -static void *MD5_Malloc(model_t *model, size_t size) +static void *MD5_GpuMalloc(model_t *model, size_t size) { - void *ptr = Hunk_TryAlloc(&model->skeleton_hunk, size, 64); + void *ptr = MOD_GpuMalloc(size); if (!ptr) { Com_SetLastError("Out of memory"); longjmp(md5_jmpbuf, -1); @@ -732,6 +759,11 @@ static void *MD5_Malloc(model_t *model, size_t size) return ptr; } +static void *MD5_CpuMalloc(model_t *model, size_t size) +{ + return gl_static.use_gpu_lerp ? R_Mallocz(size) : MD5_GpuMalloc(model, size); +} + static void MD5_ParseExpect(const char **buffer, const char *expect) { char *token = COM_Parse(buffer); @@ -893,10 +925,7 @@ static bool MD5_ParseMesh(model_t *model, const char *s, const char *path) MD5_ParseExpect(&s, "MD5Version"); MD5_ParseExpect(&s, "10"); - // allocate data storage, now that we're definitely an MD5 - Hunk_Begin(&model->skeleton_hunk, 0x800000); - - model->skeleton = mdl = MD5_Malloc(model, sizeof(*mdl)); + model->skeleton = mdl = MD5_CpuMalloc(model, sizeof(*mdl)); MD5_ParseExpect(&s, "commandline"); COM_SkipToken(&s); @@ -927,7 +956,7 @@ static bool MD5_ParseMesh(model_t *model, const char *s, const char *path) MD5_ParseExpect(&s, "}"); - mdl->meshes = MD5_Malloc(model, mdl->num_meshes * sizeof(mdl->meshes[0])); + mdl->meshes = MD5_CpuMalloc(model, mdl->num_meshes * sizeof(mdl->meshes[0])); for (i = 0; i < mdl->num_meshes; i++) { md5_mesh_t *mesh = &mdl->meshes[i]; @@ -939,8 +968,8 @@ static bool MD5_ParseMesh(model_t *model, const char *s, const char *path) MD5_ParseExpect(&s, "numverts"); mesh->num_verts = MD5_ParseUint(&s, 0, TESS_MAX_VERTICES); - mesh->vertices = MD5_Malloc(model, mesh->num_verts * sizeof(mesh->vertices[0])); - mesh->tcoords = MD5_Malloc(model, mesh->num_verts * sizeof(mesh->tcoords [0])); + mesh->vertices = MD5_GpuMalloc(model, mesh->num_verts * sizeof(mesh->vertices[0])); + mesh->tcoords = MD5_GpuMalloc(model, mesh->num_verts * sizeof(mesh->tcoords [0])); for (j = 0; j < mesh->num_verts; j++) { MD5_ParseExpect(&s, "vert"); @@ -960,7 +989,7 @@ static bool MD5_ParseMesh(model_t *model, const char *s, const char *path) MD5_ParseExpect(&s, "numtris"); uint32_t num_tris = MD5_ParseUint(&s, 0, TESS_MAX_INDICES / 3); - mesh->indices = MD5_Malloc(model, num_tris * 3 * sizeof(mesh->indices[0])); + mesh->indices = MD5_GpuMalloc(model, num_tris * 3 * sizeof(mesh->indices[0])); mesh->num_indices = num_tris * 3; for (j = 0; j < num_tris; j++) { @@ -972,8 +1001,8 @@ static bool MD5_ParseMesh(model_t *model, const char *s, const char *path) MD5_ParseExpect(&s, "numweights"); mesh->num_weights = MD5_ParseUint(&s, 0, MD5_MAX_WEIGHTS); - mesh->weights = MD5_Malloc(model, mesh->num_weights * sizeof(mesh->weights [0])); - mesh->jointnums = MD5_Malloc(model, mesh->num_weights * sizeof(mesh->jointnums[0])); + mesh->weights = MD5_GpuMalloc(model, mesh->num_weights * sizeof(mesh->weights [0])); + mesh->jointnums = MD5_GpuMalloc(model, mesh->num_weights * sizeof(mesh->jointnums[0])); for (j = 0; j < mesh->num_weights; j++) { MD5_ParseExpect(&s, "weight"); @@ -1255,7 +1284,7 @@ static bool MD5_ParseAnim(model_t *model, const char *s, const char *path) MD5_ParseExpect(&s, "}"); - mdl->skeleton_frames = MD5_Malloc(model, sizeof(mdl->skeleton_frames[0]) * mdl->num_frames * mdl->num_joints); + mdl->skeleton_frames = MD5_CpuMalloc(model, sizeof(mdl->skeleton_frames[0]) * mdl->num_frames * mdl->num_joints); // initialize scales for (i = 0; i < mdl->num_frames * mdl->num_joints; i++) @@ -1311,8 +1340,11 @@ static bool MD5_LoadSkins(model_t *model) md5_model_t *mdl = model->skeleton; const maliasmesh_t *mesh = &model->meshes[0]; + if (!mesh->numskins) + return true; + mdl->num_skins = mesh->numskins; - mdl->skins = Hunk_TryAlloc(&model->skeleton_hunk, sizeof(mdl->skins[0]) * mdl->num_skins, 64); + mdl->skins = MOD_CpuMalloc(sizeof(mdl->skins[0]) * mdl->num_skins); if (!mdl->skins) { Com_EPrintf("Out of memory for MD5 skins\n"); return false; @@ -1338,6 +1370,16 @@ static bool MD5_LoadSkins(model_t *model) return true; } +static void MD5_Free(md5_model_t *mdl) +{ + if (!mdl || !gl_static.use_gpu_lerp) + return; + Z_Free(mdl->meshes); + Z_Free(mdl->skeleton_frames); + Z_Free(mdl->skins); + Z_Free(mdl); +} + static void MOD_LoadMD5(model_t *model) { char model_name[MAX_QPATH], base_path[MAX_QPATH]; @@ -1353,6 +1395,8 @@ static void MOD_LoadMD5(model_t *model) if (!FS_FileExists(mesh_path) || !FS_FileExists(anim_path)) return; + size_t watermark = model->hunk.cursize; + if (!MD5_LoadFile(model, mesh_path, MD5_ParseMesh)) goto fail; if (!MD5_LoadFile(model, anim_path, MD5_ParseAnim)) @@ -1360,12 +1404,12 @@ static void MOD_LoadMD5(model_t *model) if (!MD5_LoadSkins(model)) goto fail; - Hunk_End(&model->skeleton_hunk); return; fail: + MD5_Free(model->skeleton); model->skeleton = NULL; - Hunk_Free(&model->skeleton_hunk); + Hunk_FreeToWatermark(&model->hunk, watermark); } #endif // USE_MD5 @@ -1401,68 +1445,46 @@ static void MOD_Reference(model_t *model) model->registration_sequence = r_registration_sequence; } -static void MOD_UploadBuffer(model_t *model) -{ - size_t verts_size = 0; - size_t index_size = 0; - int i; - - for (i = 0; i < model->nummeshes; i++) { - const maliasmesh_t *mesh = &model->meshes[i]; - verts_size += sizeof(mesh->tcoords[0]) * mesh->numverts; - index_size += sizeof(mesh->indices[0]) * mesh->numindices; - } - -#if USE_MD5 - const md5_model_t *skel = model->skeleton; - if (skel) { - for (i = 0; i < skel->num_meshes; i++) { - const md5_mesh_t *mesh = &skel->meshes[i]; - verts_size += sizeof(mesh->tcoords[0]) * mesh->num_verts; - index_size += sizeof(mesh->indices[0]) * mesh->num_indices; - } - } -#endif +#define FIXUP_OFFSET(ptr) ((ptr) = (void *)((uintptr_t)(ptr) - base)) +// upload hunk to GPU and free it +static bool MOD_UploadBuffer(model_t *model) +{ GL_ClearErrors(); - qglGenBuffers(1, &model->buffer); GL_BindBuffer(GL_ARRAY_BUFFER, model->buffer); - qglBufferData(GL_ARRAY_BUFFER, verts_size + index_size, NULL, GL_STATIC_DRAW); - Com_DDPrintf("%s: %zu bytes buffer\n", model->name, verts_size + index_size); + qglBufferData(GL_ARRAY_BUFFER, model->hunk.cursize, model->hunk.base, GL_STATIC_DRAW); + if (GL_ShowErrors(__func__)) + return false; - size_t verts_offset = 0; - size_t index_offset = verts_size; + const uintptr_t base = (uintptr_t)model->hunk.base; - for (i = 0; i < model->nummeshes; i++) { + for (int i = 0; i < model->nummeshes; i++) { maliasmesh_t *mesh = &model->meshes[i]; - verts_size = sizeof(mesh->tcoords[0]) * mesh->numverts; - index_size = sizeof(mesh->indices[0]) * mesh->numindices; - qglBufferSubData(GL_ARRAY_BUFFER, verts_offset, verts_size, mesh->tcoords); - qglBufferSubData(GL_ARRAY_BUFFER, index_offset, index_size, mesh->indices); - mesh->tcoords = (maliastc_t *)verts_offset; - mesh->indices = (glIndex_t *)index_offset; - verts_offset += verts_size; - index_offset += index_size; + FIXUP_OFFSET(mesh->verts); + FIXUP_OFFSET(mesh->tcoords); + FIXUP_OFFSET(mesh->indices); } #if USE_MD5 + const md5_model_t *skel = model->skeleton; if (skel) { - for (i = 0; i < skel->num_meshes; i++) { + for (int i = 0; i < skel->num_meshes; i++) { md5_mesh_t *mesh = &skel->meshes[i]; - verts_size = sizeof(mesh->tcoords[0]) * mesh->num_verts; - index_size = sizeof(mesh->indices[0]) * mesh->num_indices; - qglBufferSubData(GL_ARRAY_BUFFER, verts_offset, verts_size, mesh->tcoords); - qglBufferSubData(GL_ARRAY_BUFFER, index_offset, index_size, mesh->indices); - mesh->tcoords = (maliastc_t *)verts_offset; - mesh->indices = (glIndex_t *)index_offset; - verts_offset += verts_size; - index_offset += index_size; + FIXUP_OFFSET(mesh->vertices); + FIXUP_OFFSET(mesh->tcoords); + FIXUP_OFFSET(mesh->indices); + FIXUP_OFFSET(mesh->weights); + FIXUP_OFFSET(mesh->jointnums); } } #endif - GL_ShowErrors(__func__); + size_t mapped = model->hunk.mapped; + Hunk_Free(&model->hunk); + model->hunk.mapped = mapped; // for statistics + + return true; } qhandle_t R_RegisterModel(const char *name) @@ -1554,7 +1576,7 @@ qhandle_t R_RegisterModel(const char *name) FS_FreeFile(rawdata); if (ret < 0) { - memset(model, 0, sizeof(*model)); + MOD_Free(model); goto fail1; } @@ -1565,8 +1587,13 @@ qhandle_t R_RegisterModel(const char *name) MOD_LoadMD5(model); #endif - if (model->type == MOD_ALIAS && !(gl_config.caps & QGL_CAP_CLIENT_VA)) - MOD_UploadBuffer(model); + Hunk_End(&model->hunk); + + if (model->type == MOD_ALIAS && gl_static.use_gpu_lerp && !MOD_UploadBuffer(model)) { + MOD_Free(model); + ret = Q_ERR_LIBRARY_ERROR; + goto fail1; + } done: index = (model - r_models) + 1; @@ -1597,6 +1624,49 @@ model_t *MOD_ForHandle(qhandle_t h) void MOD_Init(void) { Q_assert(!r_numModels); + + // set defaults + gl_static.use_gpu_lerp = false; + gl_static.hunk_align = 64; + gl_static.hunk_maxsize = MOD_MAXSIZE_CPU; + + cvar_t *gl_gpulerp = Cvar_Get("gl_gpulerp", "1", 0); + gl_gpulerp->flags &= ~CVAR_FILES; + + if (!(gl_config.caps & QGL_CAP_CLIENT_VA)) { + // MUST use GPU lerp if using core profile + Q_assert(gl_static.use_shaders); + gl_static.use_gpu_lerp = true; + } else if (gl_static.use_shaders) { + // restrict `auto' to GL 4.3 and higher + int minval = 1 + !(gl_config.caps & QGL_CAP_SHADER_STORAGE); + gl_static.use_gpu_lerp = gl_gpulerp->integer >= minval; + gl_gpulerp->flags |= CVAR_FILES; + } + + // can reserve more space if using GPU lerp + if (gl_static.use_gpu_lerp) + gl_static.hunk_maxsize = MOD_MAXSIZE_GPU; + +#if USE_MD5 + // prefer shader storage, but support buffer textures as fallback. + // if neither are supported and GPU lerp is enabled, disable MD5. + if (gl_static.use_gpu_lerp && gl_md5_load->integer) { + if (gl_config.caps & QGL_CAP_SHADER_STORAGE) { + gl_static.hunk_align = max(16, gl_config.ssbo_align); + } else if (!(gl_config.caps & QGL_CAP_BUFFER_TEXTURE)) { + Com_WPrintf("Animating MD5 models on GPU is not supported " + "on this system. MD5 models will be disabled.\n"); + Cvar_Set("gl_md5_load", "0"); + } + } +#endif + + Com_DPrintf("GPU lerp %s\n", gl_static.use_gpu_lerp ? + (gl_config.caps & QGL_CAP_SHADER_STORAGE) ? "enabled (shader storage)" : + (gl_config.caps & QGL_CAP_BUFFER_TEXTURE) ? "enabled (buffer texture)" : + "enabled" : "disabled"); + Cmd_AddCommand("modellist", MOD_List_f); } diff --git a/src/refresh/qgl.c b/src/refresh/qgl.c index 590ecf370..42ba5acf1 100644 --- a/src/refresh/qgl.c +++ b/src/refresh/qgl.c @@ -291,10 +291,24 @@ static const glsection_t sections[] = { .caps = QGL_CAP_TEXTURE_MAX_LEVEL | QGL_CAP_TEXTURE_NON_POWER_OF_TWO, .functions = (const glfunction_t []) { QGL_FN(BindBufferBase), + QGL_FN(BindBufferRange), QGL_FN(BindVertexArray), QGL_FN(DeleteVertexArrays), QGL_FN(GenVertexArrays), QGL_FN(GetStringi), + QGL_FN(VertexAttribIPointer), + { NULL } + } + }, + + // GL 3.1 + // ES 3.2 + { + .ver_gl = QGL_VER(3, 1), + .ver_es = QGL_VER(3, 2), + .caps = QGL_CAP_BUFFER_TEXTURE, + .functions = (const glfunction_t []) { + QGL_FN(TexBuffer), { NULL } } }, @@ -339,6 +353,14 @@ static const glsection_t sections[] = { } }, + // GL 4.3 + // ES 3.1 + { + .ver_gl = QGL_VER(4, 3), + .ver_es = QGL_VER(3, 1), + .caps = QGL_CAP_SHADER_STORAGE, + }, + // GL 4.4 { .ver_gl = QGL_VER(4, 4), diff --git a/src/refresh/qgl.h b/src/refresh/qgl.h index d7bfb8325..b66e11547 100644 --- a/src/refresh/qgl.h +++ b/src/refresh/qgl.h @@ -139,6 +139,7 @@ QGLAPI void (APIENTRYP qglVertexAttribPointer)(GLuint index, GLint size, GLenum // GL 3.0 QGLAPI void (APIENTRYP qglBindBufferBase)(GLenum target, GLuint index, GLuint buffer); +QGLAPI void (APIENTRYP qglBindBufferRange)(GLenum target, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size); QGLAPI void (APIENTRYP qglBindFramebuffer)(GLenum target, GLuint framebuffer); QGLAPI void (APIENTRYP qglBindRenderbuffer)(GLenum target, GLuint renderbuffer); QGLAPI void (APIENTRYP qglBindVertexArray)(GLuint array); @@ -155,10 +156,12 @@ QGLAPI void (APIENTRYP qglGenerateMipmap)(GLenum target); QGLAPI void (APIENTRYP qglGetFramebufferAttachmentParameteriv)(GLenum target, GLenum attachment, GLenum pname, GLint *params); QGLAPI const GLubyte *(APIENTRYP qglGetStringi)(GLenum name, GLuint index); QGLAPI void (APIENTRYP qglRenderbufferStorage)(GLenum target, GLenum internalformat, GLsizei width, GLsizei height); +QGLAPI void (APIENTRYP qglVertexAttribIPointer)(GLuint index, GLint size, GLenum type, GLsizei stride, const void *pointer); // GL 3.1 QGLAPI void (APIENTRYP qglGetActiveUniformBlockiv)(GLuint program, GLuint uniformBlockIndex, GLenum pname, GLint *params); QGLAPI GLuint (APIENTRYP qglGetUniformBlockIndex)(GLuint program, const GLchar *uniformBlockName); +QGLAPI void (APIENTRYP qglTexBuffer)(GLenum target, GLenum internalformat, GLuint buffer); QGLAPI void (APIENTRYP qglUniformBlockBinding)(GLuint program, GLuint uniformBlockIndex, GLuint uniformBlockBinding); // GL 4.1 diff --git a/src/refresh/shader.c b/src/refresh/shader.c index a931d2f76..302bbbac2 100644 --- a/src/refresh/shader.c +++ b/src/refresh/shader.c @@ -24,8 +24,16 @@ with this program; if not, write to the Free Software Foundation, Inc., #define GLSL(x) SZ_Write(buf, CONST_STR_LEN(#x "\n")); #define GLSF(x) SZ_Write(buf, CONST_STR_LEN(x)) -static void write_header(sizebuf_t *buf) +static void write_header(sizebuf_t *buf, glStateBits_t bits) { +#if USE_MD5 + if (bits & GLS_MESH_MD5 && gl_config.caps & QGL_CAP_SHADER_STORAGE) { + if (gl_config.ver_es) + GLSF("#version 310 es\n"); + else + GLSF("#version 430\n"); + } else +#endif if (gl_config.ver_es) { GLSF("#version 300 es\n"); } else if (gl_config.ver_sl >= QGL_VER(1, 40)) { @@ -36,18 +44,39 @@ static void write_header(sizebuf_t *buf) } } -static void write_block(sizebuf_t *buf) +static void write_block(sizebuf_t *buf, glStateBits_t bits) { GLSF("layout(std140) uniform u_block {\n"); + GLSL(mat4 m_vp;); + + if (bits & GLS_MESH_ANY) { + GLSL( + vec3 u_old_scale; + vec3 u_new_scale; + vec3 u_translate; + vec3 u_shadedir; + vec4 u_color; + vec4 pad_0; + float pad_1; + float pad_2; + float pad_3; + uint u_weight_ofs; + uint u_jointnum_ofs; + float u_shellscale; + float u_backlerp; + float u_frontlerp; + ) + } else { + GLSL(mat4 m_sky[2];) + } + GLSL( - mat4 m_vp; - mat4 m_sky[2]; float u_time; float u_modulate; float u_add; float u_intensity; float u_intensity2; - float pad_1; + float pad_4; vec2 w_amp; vec2 w_phase; vec2 u_scroll; @@ -55,10 +84,201 @@ static void write_block(sizebuf_t *buf) GLSF("};\n"); } +static void write_shadedot(sizebuf_t *buf) +{ + GLSL( + float shadedot(vec3 normal) { + float d = dot(normal, u_shadedir); + if (d < 0.0) + d *= 0.3; + return d + 1.0; + } + ) +} + +#if USE_MD5 +static void write_skel_shader(sizebuf_t *buf, glStateBits_t bits) +{ + GLSL( + struct Joint { + vec4 pos; + mat3x3 axis; + }; + layout(std140) uniform Skeleton { + Joint u_joints[256]; + }; + ) + + if (gl_config.caps & QGL_CAP_SHADER_STORAGE) { + GLSL( + layout(std430, binding = 0) readonly buffer Weights { + vec4 b_weights[]; + }; + + layout(std430, binding = 1) readonly buffer JointNums { + uint b_jointnums[]; + }; + ) + } else { + GLSL( + uniform samplerBuffer u_weights; + uniform usamplerBuffer u_jointnums; + ) + } + + GLSL( + in vec2 a_tc; + in vec3 a_norm; + in uvec2 a_vert; + + out vec2 v_tc; + out vec4 v_color; + ) + + if (bits & GLS_MESH_SHADE) + write_shadedot(buf); + + GLSF("void main() {\n"); + GLSL( + vec3 out_pos = vec3(0.0); + vec3 out_norm = vec3(0.0); + + uint start = a_vert[0]; + uint count = a_vert[1]; + ) + + GLSF("for (uint i = start; i < start + count; i++) {\n"); + if (gl_config.caps & QGL_CAP_SHADER_STORAGE) { + GLSL( + uint jointnum = b_jointnums[i / 4U]; + jointnum >>= (i & 3U) * 8U; + jointnum &= 255U; + + vec4 weight = b_weights[i]; + ) + } else { + GLSL( + uint jointnum = texelFetch(u_jointnums, int(u_jointnum_ofs + i)).r; + vec4 weight = texelFetch(u_weights, int(u_weight_ofs + i)); + ) + } + GLSL( + Joint joint = u_joints[jointnum]; + + vec3 wv = joint.pos.xyz + (weight.xyz * joint.axis) * joint.pos.w; + out_pos += wv * weight.w; + + out_norm += a_norm * joint.axis * weight.w; + ) + GLSF("}\n"); + + GLSL(v_tc = a_tc;) + + if (bits & GLS_MESH_SHADE) + GLSL(v_color = vec4(u_color.rgb * shadedot(out_norm), u_color.a);) + else + GLSL(v_color = u_color;) + + if (bits & GLS_MESH_SHELL) + GLSL(out_pos += out_norm * u_shellscale;) + + GLSL(gl_Position = m_vp * vec4(out_pos, 1.0);) + GLSF("}\n"); +} +#endif + +static void write_getnormal(sizebuf_t *buf) +{ + GLSL( + vec3 get_normal(int norm) { + const float pi = 3.14159265358979323846; + const float scale = pi * (2.0 / 255.0); + float lat = float( uint(norm) & 255U) * scale; + float lng = float((uint(norm) >> 8) & 255U) * scale; + return vec3( + sin(lat) * cos(lng), + sin(lat) * sin(lng), + cos(lat) + ); + } + ) +} + +static void write_mesh_shader(sizebuf_t *buf, glStateBits_t bits) +{ + GLSL( + in vec2 a_tc; + in ivec4 a_new_pos; + ) + + if (bits & GLS_MESH_LERP) + GLSL(in ivec4 a_old_pos;) + + GLSL( + out vec2 v_tc; + out vec4 v_color; + ) + + if (bits & (GLS_MESH_SHELL | GLS_MESH_SHADE)) + write_getnormal(buf); + + if (bits & GLS_MESH_SHADE) + write_shadedot(buf); + + GLSF("void main() {\n"); + GLSL(v_tc = a_tc;) + + if (bits & GLS_MESH_LERP) { + if (bits & (GLS_MESH_SHELL | GLS_MESH_SHADE)) + GLSL( + vec3 old_norm = get_normal(a_old_pos.w); + vec3 new_norm = get_normal(a_new_pos.w); + ) + + GLSL(vec3 pos = vec3(a_old_pos.xyz) * u_old_scale + vec3(a_new_pos.xyz) * u_new_scale + u_translate;) + + if (bits & GLS_MESH_SHELL) + GLSL(pos += normalize((old_norm * u_backlerp + new_norm * u_frontlerp)) * u_shellscale;) + + if (bits & GLS_MESH_SHADE) + GLSL(v_color = vec4(u_color.rgb * (shadedot(old_norm) * u_backlerp + shadedot(new_norm) * u_frontlerp), u_color.a);) + else + GLSL(v_color = u_color;) + } else { + if (bits & (GLS_MESH_SHELL | GLS_MESH_SHADE)) + GLSL(vec3 norm = get_normal(a_new_pos.w);) + + GLSL(vec3 pos = vec3(a_new_pos.xyz) * u_new_scale + u_translate;) + + if (bits & GLS_MESH_SHELL) + GLSL(pos += norm * u_shellscale;) + + if (bits & GLS_MESH_SHADE) + GLSL(v_color = vec4(u_color.rgb * shadedot(norm), u_color.a);) + else + GLSL(v_color = u_color;) + } + + GLSL(gl_Position = m_vp * vec4(pos, 1.0);) + GLSF("}\n"); +} + static void write_vertex_shader(sizebuf_t *buf, glStateBits_t bits) { - write_header(buf); - write_block(buf); + write_header(buf, bits); + write_block(buf, bits); + +#if USE_MD5 + if (bits & GLS_MESH_MD5) { + write_skel_shader(buf, bits); + return; + } +#endif + + if (bits & GLS_MESH_MD2) { + write_mesh_shader(buf, bits); + return; + } GLSL(in vec4 a_pos;) if (bits & GLS_SKY_MASK) { @@ -101,13 +321,13 @@ static void write_vertex_shader(sizebuf_t *buf, glStateBits_t bits) static void write_fragment_shader(sizebuf_t *buf, glStateBits_t bits) { - write_header(buf); + write_header(buf, bits); if (gl_config.ver_es) GLSL(precision mediump float;) if (bits & (GLS_WARP_ENABLE | GLS_LIGHTMAP_ENABLE | GLS_INTENSITY_ENABLE | GLS_SKY_MASK)) - write_block(buf); + write_block(buf, bits); if (bits & GLS_CLASSIC_SKY) { GLSL( @@ -259,13 +479,27 @@ static GLuint create_and_use_program(glStateBits_t bits) qglAttachShader(program, shader_v); qglAttachShader(program, shader_f); - qglBindAttribLocation(program, VERT_ATTR_POS, "a_pos"); - if (!(bits & GLS_SKY_MASK)) - qglBindAttribLocation(program, VERT_ATTR_TC, "a_tc"); - if (bits & GLS_LIGHTMAP_ENABLE) - qglBindAttribLocation(program, VERT_ATTR_LMTC, "a_lmtc"); - if (!(bits & GLS_TEXTURE_REPLACE)) - qglBindAttribLocation(program, VERT_ATTR_COLOR, "a_color"); +#if USE_MD5 + if (bits & GLS_MESH_MD5) { + qglBindAttribLocation(program, VERT_ATTR_MESH_TC, "a_tc"); + qglBindAttribLocation(program, VERT_ATTR_MESH_NORM, "a_norm"); + qglBindAttribLocation(program, VERT_ATTR_MESH_VERT, "a_vert"); + } else +#endif + if (bits & GLS_MESH_MD2) { + qglBindAttribLocation(program, VERT_ATTR_MESH_TC, "a_tc"); + if (bits & GLS_MESH_LERP) + qglBindAttribLocation(program, VERT_ATTR_MESH_OLD_POS, "a_old_pos"); + qglBindAttribLocation(program, VERT_ATTR_MESH_NEW_POS, "a_new_pos"); + } else { + qglBindAttribLocation(program, VERT_ATTR_POS, "a_pos"); + if (!(bits & GLS_SKY_MASK)) + qglBindAttribLocation(program, VERT_ATTR_TC, "a_tc"); + if (bits & GLS_LIGHTMAP_ENABLE) + qglBindAttribLocation(program, VERT_ATTR_LMTC, "a_lmtc"); + if (!(bits & GLS_TEXTURE_REPLACE)) + qglBindAttribLocation(program, VERT_ATTR_COLOR, "a_color"); + } qglLinkProgram(program); @@ -300,10 +534,27 @@ static GLuint create_and_use_program(glStateBits_t bits) return program; } - qglUniformBlockBinding(program, index, 0); + qglUniformBlockBinding(program, index, UBO_UNIFORMS); + +#if USE_MD5 + if (bits & GLS_MESH_MD5) { + index = qglGetUniformBlockIndex(program, "Skeleton"); + if (index == GL_INVALID_INDEX) { + Com_EPrintf("Skeleton block not found\n"); + return program; + } + qglUniformBlockBinding(program, index, UBO_SKELETON); + } +#endif qglUseProgram(program); +#if USE_MD5 + if (bits & GLS_MESH_MD5 && !(gl_config.caps & QGL_CAP_SHADER_STORAGE)) { + qglUniform1i(qglGetUniformLocation(program, "u_weights"), TMU_SKEL_WEIGHTS); + qglUniform1i(qglGetUniformLocation(program, "u_jointnums"), TMU_SKEL_JOINTNUMS); + } +#endif if (bits & GLS_CLASSIC_SKY) { qglUniform1i(qglGetUniformLocation(program, "u_texture1"), TMU_TEXTURE); qglUniform1i(qglGetUniformLocation(program, "u_texture2"), TMU_LIGHTMAP); @@ -385,6 +636,7 @@ static void shader_color(GLfloat r, GLfloat g, GLfloat b, GLfloat a) static void shader_load_uniforms(void) { + GL_BindBuffer(GL_UNIFORM_BUFFER, gl_static.uniform_buffer); qglBufferData(GL_UNIFORM_BUFFER, sizeof(gls.u_block), &gls.u_block, GL_DYNAMIC_DRAW); c.uniformUploads++; } @@ -466,10 +718,21 @@ static void shader_init(void) gl_static.programs = HashMap_TagCreate(glStateBits_t, GLuint, HashInt32, NULL, TAG_RENDERER); qglGenBuffers(1, &gl_static.uniform_buffer); - qglBindBuffer(GL_UNIFORM_BUFFER, gl_static.uniform_buffer); - qglBindBufferBase(GL_UNIFORM_BUFFER, 0, gl_static.uniform_buffer); + GL_BindBuffer(GL_UNIFORM_BUFFER, gl_static.uniform_buffer); + qglBindBufferBase(GL_UNIFORM_BUFFER, UBO_UNIFORMS, gl_static.uniform_buffer); qglBufferData(GL_UNIFORM_BUFFER, sizeof(gls.u_block), NULL, GL_DYNAMIC_DRAW); +#if USE_MD5 + if (gl_config.caps & QGL_CAP_SKELETON_MASK) { + qglGenBuffers(1, &gl_static.skeleton_buffer); + GL_BindBuffer(GL_UNIFORM_BUFFER, gl_static.skeleton_buffer); + qglBindBufferBase(GL_UNIFORM_BUFFER, UBO_SKELETON, gl_static.skeleton_buffer); + + if ((gl_config.caps & QGL_CAP_SKELETON_MASK) == QGL_CAP_BUFFER_TEXTURE) + qglGenTextures(2, gl_static.skeleton_tex); + } +#endif + if (gl_config.ver_gl >= QGL_VER(3, 2)) qglEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); } @@ -494,6 +757,17 @@ static void shader_shutdown(void) gl_static.uniform_buffer = 0; } +#if USE_MD5 + if (gl_static.skeleton_buffer) { + qglDeleteBuffers(1, &gl_static.skeleton_buffer); + gl_static.skeleton_buffer = 0; + } + if (gl_static.skeleton_tex[0] || gl_static.skeleton_tex[1]) { + qglDeleteTextures(2, gl_static.skeleton_tex); + gl_static.skeleton_tex[0] = gl_static.skeleton_tex[1] = 0; + } +#endif + if (gl_config.ver_gl >= QGL_VER(3, 2)) qglDisable(GL_TEXTURE_CUBE_MAP_SEAMLESS); } diff --git a/src/refresh/state.c b/src/refresh/state.c index 17d08f5b2..2e2cc93dd 100644 --- a/src/refresh/state.c +++ b/src/refresh/state.c @@ -321,8 +321,9 @@ void GL_Setup3D(bool waterwarp) void GL_DrawOutlines(GLsizei count, const glIndex_t *indices, bool indexed) { GL_BindTexture(TMU_TEXTURE, TEXNUM_WHITE); - GL_StateBits(GLS_DEPTHMASK_FALSE | GLS_TEXTURE_REPLACE); - GL_ArrayBits(GLA_VERTEX); + GL_StateBits(GLS_DEPTHMASK_FALSE | GLS_TEXTURE_REPLACE | (gls.state_bits & GLS_MESH_MASK)); + if (gls.currentva) + GL_ArrayBits(GLA_VERTEX); GL_DepthRange(0, 0); if (qglPolygonMode) { diff --git a/src/refresh/tess.c b/src/refresh/tess.c index 9124ac85d..b01bd9af0 100644 --- a/src/refresh/tess.c +++ b/src/refresh/tess.c @@ -604,7 +604,9 @@ void GL_DrawIndexed(showtris_t showtris) GL_LockArrays(tess.numverts); - if (!(gl_config.caps & QGL_CAP_CLIENT_VA)) { + if (gl_config.caps & QGL_CAP_CLIENT_VA) { + GL_BindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + } else { GL_BindBuffer(GL_ELEMENT_ARRAY_BUFFER, gl_static.index_buffer); qglBufferData(GL_ELEMENT_ARRAY_BUFFER, tess.numindices * sizeof(indices[0]), indices, GL_STREAM_DRAW); indices = NULL; From 4bbaafac17c569d91fa0f1e0df17750fb3c4135b Mon Sep 17 00:00:00 2001 From: Andrey Nazarov Date: Mon, 7 Oct 2024 04:09:04 +0300 Subject: [PATCH 11/19] Always use 16-bit indices for meshes. --- src/refresh/gl.h | 9 +++------ src/refresh/mesh.c | 16 ++++++++-------- src/refresh/models.c | 2 +- src/refresh/state.c | 34 ++++++++++++++++++++++------------ src/refresh/tess.c | 6 +++--- 5 files changed, 37 insertions(+), 30 deletions(-) diff --git a/src/refresh/gl.h b/src/refresh/gl.h index 41fca4841..c911eb2ac 100644 --- a/src/refresh/gl.h +++ b/src/refresh/gl.h @@ -321,7 +321,7 @@ typedef struct { int numtris; int numindices; int numskins; - glIndex_t *indices; + uint16_t *indices; maliasvert_t *verts; maliastc_t *tcoords; #if USE_MD5 @@ -377,7 +377,7 @@ typedef struct { md5_vertex_t *vertices; maliastc_t *tcoords; - glIndex_t *indices; + uint16_t *indices; md5_weight_t *weights; uint8_t *jointnums; } md5_mesh_t; @@ -762,9 +762,6 @@ static inline void GL_DepthRange(GLfloat n, GLfloat f) #define GL_Color(r, g, b, a) gl_backend->color(r, g, b, a) -#define GL_DrawTriangles(num_indices, indices) \ - qglDrawElements(GL_TRIANGLES, num_indices, QGL_INDEX_TYPE, indices) - typedef enum { SHOWTRIS_NONE = 0, SHOWTRIS_WORLD = BIT(0), @@ -780,7 +777,7 @@ void GL_BindCubemap(GLuint texnum); void GL_DeleteBuffer(GLuint buffer); void GL_CommonStateBits(glStateBits_t bits); void GL_ScrollPos(vec2_t scroll, glStateBits_t bits); -void GL_DrawOutlines(GLsizei count, const glIndex_t *indices, bool indexed); +void GL_DrawOutlines(GLsizei count, GLenum type, const void *indices); void GL_Ortho(GLfloat xmin, GLfloat xmax, GLfloat ymin, GLfloat ymax, GLfloat znear, GLfloat zfar); void GL_Frustum(GLfloat fov_x, GLfloat fov_y, GLfloat reflect_x); void GL_Setup2D(void); diff --git a/src/refresh/mesh.c b/src/refresh/mesh.c index 40c52bcd9..ee60422fc 100644 --- a/src/refresh/mesh.c +++ b/src/refresh/mesh.c @@ -433,7 +433,7 @@ static void uniform_mesh_color(float r, float g, float b, float a) } } -static void draw_celshading(const glIndex_t *indices, int num_indices) +static void draw_celshading(const uint16_t *indices, int num_indices) { if (celscale < 0.01f) return; @@ -449,7 +449,7 @@ static void draw_celshading(const glIndex_t *indices, int num_indices) qglLineWidth(gl_celshading->value * celscale); qglPolygonMode(GL_FRONT_AND_BACK, GL_LINE); qglCullFace(GL_FRONT); - GL_DrawTriangles(num_indices, indices); + qglDrawElements(GL_TRIANGLES, num_indices, GL_UNSIGNED_SHORT, indices); qglCullFace(GL_BACK); qglPolygonMode(GL_FRONT_AND_BACK, GL_FILL); qglLineWidth(1); @@ -545,7 +545,7 @@ static void setup_shadow(void) GL_MultMatrix(shadowmatrix, tmp, matrix); } -static void draw_shadow(const glIndex_t *indices, int num_indices) +static void draw_shadow(const uint16_t *indices, int num_indices) { if (!drawshadow) return; @@ -570,7 +570,7 @@ static void draw_shadow(const glIndex_t *indices, int num_indices) qglEnable(GL_POLYGON_OFFSET_FILL); qglPolygonOffset(-1.0f, -2.0f); - GL_DrawTriangles(num_indices, indices); + qglDrawElements(GL_TRIANGLES, num_indices, GL_UNSIGNED_SHORT, indices); qglDisable(GL_POLYGON_OFFSET_FILL); // once we have drawn something to stencil buffer, continue to clear it for @@ -623,7 +623,7 @@ static void bind_alias_arrays(const maliasmesh_t *mesh) } } -static void draw_alias_mesh(const glIndex_t *indices, int num_indices, +static void draw_alias_mesh(const uint16_t *indices, int num_indices, const maliastc_t *tcoords, int num_verts, image_t **skins, int num_skins) { @@ -657,7 +657,7 @@ static void draw_alias_mesh(const glIndex_t *indices, int num_indices, qglColorMask(0, 0, 0, 0); GL_LockArrays(num_verts); - GL_DrawTriangles(num_indices, indices); + qglDrawElements(GL_TRIANGLES, num_indices, GL_UNSIGNED_SHORT, indices); GL_UnlockArrays(); qglColorMask(1, 1, 1, 1); @@ -693,13 +693,13 @@ static void draw_alias_mesh(const glIndex_t *indices, int num_indices, GL_LockArrays(num_verts); - GL_DrawTriangles(num_indices, indices); + qglDrawElements(GL_TRIANGLES, num_indices, GL_UNSIGNED_SHORT, indices); c.trisDrawn += num_indices / 3; draw_celshading(indices, num_indices); if (gl_showtris->integer & SHOWTRIS_MESH) - GL_DrawOutlines(num_indices, indices, true); + GL_DrawOutlines(num_indices, GL_UNSIGNED_SHORT, indices); // FIXME: unlock arrays before changing matrix? draw_shadow(indices, num_indices); diff --git a/src/refresh/models.c b/src/refresh/models.c index bc812a8a4..2b5b754e8 100644 --- a/src/refresh/models.c +++ b/src/refresh/models.c @@ -546,7 +546,7 @@ static int MOD_LoadMD3Mesh(model_t *model, maliasmesh_t *mesh, uint32_t *src_idx; maliasvert_t *dst_vert; maliastc_t *dst_tc; - glIndex_t *dst_idx; + uint16_t *dst_idx; uint32_t index; int i, j, k; const char *err; diff --git a/src/refresh/state.c b/src/refresh/state.c index 2e2cc93dd..f9f802756 100644 --- a/src/refresh/state.c +++ b/src/refresh/state.c @@ -318,7 +318,7 @@ void GL_Setup3D(bool waterwarp) qglClear(GL_DEPTH_BUFFER_BIT | gl_static.stencil_buffer_bit); } -void GL_DrawOutlines(GLsizei count, const glIndex_t *indices, bool indexed) +void GL_DrawOutlines(GLsizei count, GLenum type, const void *indices) { GL_BindTexture(TMU_TEXTURE, TEXNUM_WHITE); GL_StateBits(GLS_DEPTHMASK_FALSE | GLS_TEXTURE_REPLACE | (gls.state_bits & GLS_MESH_MASK)); @@ -329,22 +329,32 @@ void GL_DrawOutlines(GLsizei count, const glIndex_t *indices, bool indexed) if (qglPolygonMode) { qglPolygonMode(GL_FRONT_AND_BACK, GL_LINE); - if (indexed) - GL_DrawTriangles(count, indices); + if (type) + qglDrawElements(GL_TRIANGLES, count, type, indices); else qglDrawArrays(GL_TRIANGLES, 0, count); qglPolygonMode(GL_FRONT_AND_BACK, GL_FILL); - } else { - GLsizei i; - - if (indexed) { - for (i = 0; i < count / 3; i++) - qglDrawElements(GL_LINE_LOOP, 3, QGL_INDEX_TYPE, &indices[i * 3]); - } else { - for (i = 0; i < count / 3; i++) - qglDrawArrays(GL_LINE_LOOP, i * 3, 3); + } else if (type) { + uintptr_t base = (uintptr_t)indices; + uintptr_t size = 0; + + switch (type) { + case GL_UNSIGNED_INT: + size = 4 * 3; + break; + case GL_UNSIGNED_SHORT: + size = 2 * 3; + break; + default: + Q_assert(!"bad type"); } + + for (int i = 0; i < count / 3; i++, base += size) + qglDrawElements(GL_LINE_LOOP, 3, type, VBO_OFS(base)); + } else { + for (int i = 0; i < count / 3; i++) + qglDrawArrays(GL_LINE_LOOP, i * 3, 3); } GL_DepthRange(0, 1); diff --git a/src/refresh/tess.c b/src/refresh/tess.c index b01bd9af0..1a8eee154 100644 --- a/src/refresh/tess.c +++ b/src/refresh/tess.c @@ -127,7 +127,7 @@ void GL_DrawParticles(void) qglDrawArrays(GL_TRIANGLES, 0, numverts); if (gl_showtris->integer & SHOWTRIS_FX) - GL_DrawOutlines(numverts, NULL, false); + GL_DrawOutlines(numverts, 0, NULL); GL_UnlockArrays(); } while (total); @@ -612,11 +612,11 @@ void GL_DrawIndexed(showtris_t showtris) indices = NULL; } - GL_DrawTriangles(tess.numindices, indices); + qglDrawElements(GL_TRIANGLES, tess.numindices, QGL_INDEX_TYPE, indices); c.trisDrawn += tess.numindices / 3; if (gl_showtris->integer & showtris) - GL_DrawOutlines(tess.numindices, indices, true); + GL_DrawOutlines(tess.numindices, QGL_INDEX_TYPE, indices); GL_UnlockArrays(); } From e85d1d86798fead7cfcbd0eb9cd3d664633512aa Mon Sep 17 00:00:00 2001 From: Andrey Nazarov Date: Tue, 8 Oct 2024 02:40:12 +0300 Subject: [PATCH 12/19] Update bug report template. --- .github/ISSUE_TEMPLATE/1_bug_report.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_bug_report.md b/.github/ISSUE_TEMPLATE/1_bug_report.md index 0090d1900..97014e003 100644 --- a/.github/ISSUE_TEMPLATE/1_bug_report.md +++ b/.github/ISSUE_TEMPLATE/1_bug_report.md @@ -11,8 +11,7 @@ assignees: '' Make sure the bug is reproducible with latest Q2PRO version. If you compile Q2PRO yourself, update to the latest version from git master. If you are using -prebuilt Windows binaries, update to the latest version available from -https://skuller.net/q2pro/nightly/ +prebuilt Windows binaries, update to the latest nightly build. ### Important information @@ -51,3 +50,8 @@ Provide a link to the log file created by launching `q2pro +set developer 1 If Q2PRO crashes, provide a crash report (Windows) or a backtrace (Linux). On Linux, backtrace can be created by launching Q2PRO with `gdb q2pro --args [...]` and typing `bt` after the crash. + +### Compilation issues + +If reporting a building / compilation issue, provide `meson setup` command +line and full console output. From 7fdb039132aaf2d78ff0bb6b21842ab7bf5c1ac6 Mon Sep 17 00:00:00 2001 From: Andrey Nazarov Date: Tue, 8 Oct 2024 17:06:56 +0300 Subject: [PATCH 13/19] Update INSTALL.md. Mention that pkg-config search path must be updated in cross files. Closes #360. --- INSTALL.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index 7c9f05291..09d678d64 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -124,11 +124,14 @@ with SIMD support: Meson needs correct cross build definition file for compilation. Example cross-files can be found in `.ci` subdirectory (available in git -repository, but not source tarball). +repository, but not source tarball). Note that these cross-files are specific +to CI scripts and shouldn't be used directly (you'll need, at least, to +customize default `pkg-config` search path). Refer to Meson documentation for +more info. Setup build directory: - meson setup --cross-file .ci/x86_64-w64-mingw32.txt -Dwrap_mode=forcefallback builddir + meson setup --cross-file x86_64-w64-mingw32.txt -Dwrap_mode=forcefallback builddir Build: From 46b1bfc9fe9a00a97bbc910588db28b79c873412 Mon Sep 17 00:00:00 2001 From: Andrey Nazarov Date: Tue, 8 Oct 2024 17:09:56 +0300 Subject: [PATCH 14/19] Optimize skipping MD5 bounds. --- src/refresh/models.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/refresh/models.c b/src/refresh/models.c index 2b5b754e8..df05fecbd 100644 --- a/src/refresh/models.c +++ b/src/refresh/models.c @@ -1263,10 +1263,8 @@ static bool MD5_ParseAnim(model_t *model, const char *s, const char *path) MD5_ParseExpect(&s, "bounds"); MD5_ParseExpect(&s, "{"); - for (i = 0; i < mdl->num_frames * 2; i++) { - vec3_t dummy; - MD5_ParseVector(&s, dummy); - } + for (i = 0; i < mdl->num_frames * 2 * 5; i++) + COM_SkipToken(&s); MD5_ParseExpect(&s, "}"); From 25a54a6819223a4bf95b8f0f4acdc574f16d65ce Mon Sep 17 00:00:00 2001 From: Andrey Nazarov Date: Tue, 8 Oct 2024 21:01:46 +0300 Subject: [PATCH 15/19] Update FreeBSD VM to 14.1. --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3bf9a810f..38a0af4fe 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -139,11 +139,11 @@ jobs: - uses: actions/checkout@v4 - name: Build - uses: cross-platform-actions/action@v0.24.0 + uses: cross-platform-actions/action@v0.25.0 with: operating_system: freebsd architecture: x86-64 - version: '14.0' + version: '14.1' run: | sudo pkg update sudo pkg install -y git meson pkgconf openal-soft \ From 13c2c9b60bb276584ca934a9f9def522501b3c36 Mon Sep 17 00:00:00 2001 From: Andrey Nazarov Date: Fri, 11 Oct 2024 15:52:34 +0300 Subject: [PATCH 16/19] Fix rendering error if classic sky texture is missing. Don't attempt to use cubemap as classic sky. Regression since 47656f88. --- inc/refresh/refresh.h | 2 +- src/refresh/surf.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/inc/refresh/refresh.h b/inc/refresh/refresh.h index d3196ea4b..9302b4495 100644 --- a/inc/refresh/refresh.h +++ b/inc/refresh/refresh.h @@ -156,12 +156,12 @@ typedef enum { IF_OPAQUE = BIT(8), // known to be opaque IF_DEFAULT_FLARE = BIT(9), // default flare hack IF_CUBEMAP = BIT(10), // cubemap (or part of it) + IF_CLASSIC_SKY = BIT(11), // split in two halves // these flags only affect R_RegisterImage() behavior, // and are not stored in image IF_OPTIONAL = BIT(16), // don't warn if not found IF_KEEP_EXTENSION = BIT(17), // don't override extension - IF_CLASSIC_SKY = BIT(18), // split in two halves } imageflags_t; typedef enum { diff --git a/src/refresh/surf.c b/src/refresh/surf.c index bf832f990..648b5a733 100644 --- a/src/refresh/surf.c +++ b/src/refresh/surf.c @@ -574,7 +574,7 @@ static glStateBits_t statebits_for_surface(const mface_t *surf) glStateBits_t statebits = GLS_DEFAULT; if (surf->drawflags & SURF_SKY) { - if (Q_stricmpn(surf->texinfo->name, CONST_STR_LEN("n64/env/sky")) == 0) + if (surf->texinfo->image->flags & IF_CLASSIC_SKY) return GLS_TEXTURE_REPLACE | GLS_CLASSIC_SKY; else return GLS_TEXTURE_REPLACE | GLS_DEFAULT_SKY; From 61324ffc588fceabdffbe19d1f222b911ad2cf28 Mon Sep 17 00:00:00 2001 From: Andrey Nazarov Date: Fri, 11 Oct 2024 15:53:19 +0300 Subject: [PATCH 17/19] Use VectorRotate() macro in more places. --- src/common/math.c | 8 +++----- src/refresh/world.c | 8 ++------ 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/src/common/math.c b/src/common/math.c index 94f071dd2..81e13bcce 100644 --- a/src/common/math.c +++ b/src/common/math.c @@ -390,14 +390,12 @@ void SetupRotationMatrix(vec3_t matrix[3], const vec3_t dir, float degrees) void RotatePointAroundVector(vec3_t out, const vec3_t dir, const vec3_t in, float degrees) { vec3_t matrix[3]; - vec3_t tmp; + vec3_t temp; SetupRotationMatrix(matrix, dir, degrees); - VectorCopy(in, tmp); - out[0] = DotProduct(tmp, matrix[0]); - out[1] = DotProduct(tmp, matrix[1]); - out[2] = DotProduct(tmp, matrix[2]); + VectorCopy(in, temp); + VectorRotate(temp, matrix, out); } #if USE_MD5 diff --git a/src/refresh/world.c b/src/refresh/world.c index 682d39314..77618b830 100644 --- a/src/refresh/world.c +++ b/src/refresh/world.c @@ -275,9 +275,7 @@ static void GL_TransformLights(const mmodel_t *model) for (i = 0, light = glr.fd.dlights; i < glr.fd.num_dlights; i++, light++) { VectorSubtract(light->origin, glr.ent->origin, temp); - light->transformed[0] = DotProduct(temp, glr.entaxis[0]); - light->transformed[1] = DotProduct(temp, glr.entaxis[1]); - light->transformed[2] = DotProduct(temp, glr.entaxis[2]); + VectorRotate(temp, glr.entaxis, light->transformed); GL_MarkLights_r(model->headnode, light, BIT_ULL(i)); } } @@ -425,9 +423,7 @@ void GL_DrawBspModel(mmodel_t *model) } } VectorSubtract(glr.fd.vieworg, ent->origin, temp); - transformed[0] = DotProduct(temp, glr.entaxis[0]); - transformed[1] = DotProduct(temp, glr.entaxis[1]); - transformed[2] = DotProduct(temp, glr.entaxis[2]); + VectorRotate(temp, glr.entaxis, transformed); } else { VectorAdd(model->mins, ent->origin, bounds[0]); VectorAdd(model->maxs, ent->origin, bounds[1]); From 5dcc520f9cdc77f70841a521a29f931aa36e1cee Mon Sep 17 00:00:00 2001 From: Andrey Nazarov Date: Sat, 12 Oct 2024 22:39:45 +0300 Subject: [PATCH 18/19] Use system meson package. --- .github/workflows/build.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 38a0af4fe..d77e21be1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -58,8 +58,7 @@ jobs: - name: Install dependencies run: | sudo apt-get update - sudo apt-get install -y gcc-mingw-w64 nasm python3-pip ninja-build - sudo python3 -m pip install meson + sudo apt-get install -y gcc-mingw-w64 nasm meson ninja-build - name: Build run: | From c0966a90cf60e53ac56b1540364244935f316b19 Mon Sep 17 00:00:00 2001 From: Andrey Nazarov Date: Sun, 13 Oct 2024 11:04:35 +0300 Subject: [PATCH 19/19] Allow message write buffer to overflow. With extended protocol limits, overflowing MAX_MSGLEN with single message is possible in MVD and client demo code. Allow msg_write to overflow and add explicit overflow checks to avoid crashing with fatal error. --- src/client/demo.c | 24 +++++++++++++++--------- src/client/gtv.c | 17 +++++++++++++++++ src/common/common.c | 2 +- src/common/msg.c | 11 ++++------- src/server/game.c | 3 +++ src/server/main.c | 3 +++ src/server/mvd.c | 41 +++++++++++++++++++++++++++++++++-------- src/server/mvd/client.c | 34 ++++++++++++++++++++++------------ src/server/save.c | 6 ++++++ src/server/send.c | 9 +++++++++ 10 files changed, 113 insertions(+), 37 deletions(-) diff --git a/src/client/demo.c b/src/client/demo.c index bfbd1aac2..ac6ea9ca6 100644 --- a/src/client/demo.c +++ b/src/client/demo.c @@ -224,7 +224,9 @@ void CL_EmitDemoFrame(void) // emit and flush frame emit_delta_frame(oldframe, &cl.frame, lastframe, FRAME_CUR); - if (cls.demo.buffer.cursize + msg_write.cursize > cls.demo.buffer.maxsize) { + if (msg_write.overflowed) { + Com_WPrintf("%s: message buffer overflowed\n", __func__); + } else if (cls.demo.buffer.cursize + msg_write.cursize > cls.demo.buffer.maxsize) { Com_DPrintf("Demo frame overflowed (%u + %u > %u)\n", cls.demo.buffer.cursize, msg_write.cursize, cls.demo.buffer.maxsize); cls.demo.frames_dropped++; @@ -850,16 +852,20 @@ void CL_EmitDemoSnapshot(void) MSG_WriteByte(svc_layout); MSG_WriteString(cl.layout); - snap = Z_Malloc(sizeof(*snap) + msg_write.cursize - 1); - snap->framenum = cls.demo.frames_read; - snap->filepos = pos; - snap->msglen = msg_write.cursize; - memcpy(snap->data, msg_write.data, msg_write.cursize); + if (msg_write.overflowed) { + Com_WPrintf("%s: message buffer overflowed\n", __func__); + } else { + snap = Z_Malloc(sizeof(*snap) + msg_write.cursize - 1); + snap->framenum = cls.demo.frames_read; + snap->filepos = pos; + snap->msglen = msg_write.cursize; + memcpy(snap->data, msg_write.data, msg_write.cursize); - cls.demo.snapshots = Z_Realloc(cls.demo.snapshots, sizeof(cls.demo.snapshots[0]) * Q_ALIGN(cls.demo.numsnapshots + 1, MIN_SNAPSHOTS)); - cls.demo.snapshots[cls.demo.numsnapshots++] = snap; + cls.demo.snapshots = Z_Realloc(cls.demo.snapshots, sizeof(cls.demo.snapshots[0]) * Q_ALIGN(cls.demo.numsnapshots + 1, MIN_SNAPSHOTS)); + cls.demo.snapshots[cls.demo.numsnapshots++] = snap; - Com_DPrintf("[%d] snaplen %u\n", cls.demo.frames_read, msg_write.cursize); + Com_DPrintf("[%d] snaplen %u\n", cls.demo.frames_read, msg_write.cursize); + } SZ_Clear(&msg_write); diff --git a/src/client/gtv.c b/src/client/gtv.c index 1c18f7c39..a281f2319 100644 --- a/src/client/gtv.c +++ b/src/client/gtv.c @@ -29,6 +29,8 @@ static byte gtv_send_buffer[MAX_GTS_MSGLEN*2]; static byte gtv_message_buffer[MAX_MSGLEN]; +static void drop_client(const char *reason); + static void build_gamestate(void) { centity_t *ent; @@ -177,6 +179,13 @@ void CL_GTV_EmitFrame(void) MSG_WriteShort(0); // end of packetentities + // check for overflow + if (msg_write.overflowed) { + SZ_Clear(&msg_write); + drop_client("frame overflowed"); + return; + } + SZ_Write(&cls.gtv.message, msg_write.data, msg_write.cursize); SZ_Clear(&msg_write); } @@ -264,6 +273,14 @@ void CL_GTV_Resume(void) build_gamestate(); emit_gamestate(); + + // check for overflow + if (msg_write.overflowed) { + SZ_Clear(&msg_write); + drop_client("gamestate overflowed"); + return; + } + write_message(GTS_STREAM_DATA); SZ_Clear(&msg_write); } diff --git a/src/common/common.c b/src/common/common.c index 4e3d76ca8..b20a4af22 100644 --- a/src/common/common.c +++ b/src/common/common.c @@ -530,7 +530,7 @@ void Com_Error(error_type_t code, const char *fmt, ...) // overlap with one of the arguments! memcpy(com_errorMsg, msg, len + 1); - // fix up drity message buffers + // fix up dirty message buffers MSG_Init(); // abort any console redirects diff --git a/src/common/msg.c b/src/common/msg.c index a0fe813e6..6efdb7b49 100644 --- a/src/common/msg.c +++ b/src/common/msg.c @@ -46,19 +46,16 @@ const usercmd_t nullUserCmd; ============= MSG_Init -Initialize default buffers, clearing allow overflow/underflow flags. - -This is the only place where writing buffer is initialized. Writing buffer is -never allowed to overflow. - -Reading buffer is reinitialized in many other places. Reinitializing will set -the allow underflow flag as appropriate. +Initialize default buffers (also called from Com_Error). +This is the only place where writing buffer is initialized. ============= */ void MSG_Init(void) { SZ_Init(&msg_read, msg_read_buffer, MAX_MSGLEN, "msg_read"); SZ_Init(&msg_write, msg_write_buffer, MAX_MSGLEN, "msg_write"); + msg_read.allowunderflow = true; + msg_write.allowoverflow = true; } diff --git a/src/server/game.c b/src/server/game.c index 204c19bf8..2f0647837 100644 --- a/src/server/game.c +++ b/src/server/game.c @@ -96,6 +96,9 @@ static void PF_Unicast(edict_t *ent, qboolean reliable) goto clear; } + if (msg_write.overflowed) + Com_Error(ERR_DROP, "%s: message buffer overflowed", __func__); + clientNum = NUM_FOR_EDICT(ent) - 1; if (clientNum < 0 || clientNum >= sv_maxclients->integer) { Com_WPrintf("%s to a non-client %d\n", __func__, clientNum); diff --git a/src/server/main.c b/src/server/main.c index 8d3c8cdd7..e3711c745 100644 --- a/src/server/main.c +++ b/src/server/main.c @@ -1809,6 +1809,9 @@ static void SV_RunGameFrame(void) time_after_game = Sys_Milliseconds(); #endif + if (msg_write.overflowed) + Com_Error(ERR_DROP, "%s: message buffer overflowed", __func__); + if (msg_write.cursize) { Com_WPrintf("Game left %u bytes " "in multicast buffer, cleared.\n", diff --git a/src/server/mvd.c b/src/server/mvd.c index a4eaa8952..9cff1fae7 100644 --- a/src/server/mvd.c +++ b/src/server/mvd.c @@ -831,6 +831,13 @@ static void resume_streams(void) build_gamestate(); emit_gamestate(); + // check for overflow + if (msg_write.overflowed) { + SZ_Clear(&msg_write); + mvd_error("gamestate overflowed"); + return; + } + FOR_EACH_ACTIVE_GTV(client) { // send gamestate write_message(client, GTS_STREAM_DATA); @@ -917,9 +924,6 @@ static bool mvd_enable(void) // don't timeout mvd.clients_active = svs.realtime; - // check for activation - check_players_activity(); - return true; } @@ -1040,7 +1044,7 @@ void SV_MvdEndFrame(void) emit_frame(); // if reliable message and frame update don't fit, kick all clients - if (mvd.message.cursize + msg_write.cursize >= MAX_MSGLEN) { + if (msg_write.overflowed || mvd.message.cursize + msg_write.cursize >= MAX_MSGLEN) { SZ_Clear(&msg_write); mvd_error("frame overflowed"); return; @@ -1573,6 +1577,11 @@ static void parse_stream_start(gtv_client_t *client) // send gamestate if active if (mvd.active) { emit_gamestate(); + if (msg_write.overflowed) { + SZ_Clear(&msg_write); + drop_client(client, "gamestate overflowed"); + return; + } write_message(client, GTS_STREAM_DATA); SZ_Clear(&msg_write); } else { @@ -1955,7 +1964,7 @@ static void mvd_drop(gtv_serverop_t op) // something bad happened, remove all clients static void mvd_error(const char *reason) { - Com_EPrintf("Fatal MVD error: %s\n", reason); + Com_EPrintf("Fatal MVD server error: %s\n", reason); // stop recording rec_stop(); @@ -2010,6 +2019,13 @@ void SV_MvdMapChanged(void) build_gamestate(); emit_gamestate(); + // check for overflow + if (msg_write.overflowed) { + SZ_Clear(&msg_write); + mvd_error("gamestate overflowed"); + return; + } + // send gamestate to all MVD clients FOR_EACH_ACTIVE_GTV(client) { write_message(client, GTS_STREAM_DATA); @@ -2246,11 +2262,20 @@ static void rec_start(qhandle_t demofile) magic = MVD_MAGIC; FS_Write(&magic, 4, demofile); - if (mvd.active) { - emit_gamestate(); - rec_write(); + if (!mvd.active) + return; + + emit_gamestate(); + + // check for overflow + if (msg_write.overflowed) { SZ_Clear(&msg_write); + mvd_error("gamestate overflowed"); + return; } + + rec_write(); + SZ_Clear(&msg_write); } /* diff --git a/src/server/mvd/client.c b/src/server/mvd/client.c index e876bd2d5..327a1db3d 100644 --- a/src/server/mvd/client.c +++ b/src/server/mvd/client.c @@ -626,19 +626,23 @@ static void demo_emit_snapshot(mvd_t *mvd) MSG_WriteString(mvd->layout); } - snap = MVD_Malloc(sizeof(*snap) + msg_write.cursize - 1); - snap->framenum = mvd->framenum; - snap->filepos = pos; - snap->msglen = msg_write.cursize; - memcpy(snap->data, msg_write.data, msg_write.cursize); - - if (!mvd->snapshots) - mvd->snapshots = MVD_Malloc(sizeof(mvd->snapshots[0]) * MIN_SNAPSHOTS); - else - mvd->snapshots = Z_Realloc(mvd->snapshots, sizeof(mvd->snapshots[0]) * Q_ALIGN(mvd->numsnapshots + 1, MIN_SNAPSHOTS)); - mvd->snapshots[mvd->numsnapshots++] = snap; + if (msg_write.overflowed) { + Com_WPrintf("%s: message buffer overflowed\n", __func__); + } else { + snap = MVD_Malloc(sizeof(*snap) + msg_write.cursize - 1); + snap->framenum = mvd->framenum; + snap->filepos = pos; + snap->msglen = msg_write.cursize; + memcpy(snap->data, msg_write.data, msg_write.cursize); + + if (!mvd->snapshots) + mvd->snapshots = MVD_Malloc(sizeof(mvd->snapshots[0]) * MIN_SNAPSHOTS); + else + mvd->snapshots = Z_Realloc(mvd->snapshots, sizeof(mvd->snapshots[0]) * Q_ALIGN(mvd->numsnapshots + 1, MIN_SNAPSHOTS)); + mvd->snapshots[mvd->numsnapshots++] = snap; - Com_DPrintf("[%d] snaplen %u\n", mvd->framenum, msg_write.cursize); + Com_DPrintf("[%d] snaplen %u\n", mvd->framenum, msg_write.cursize); + } SZ_Clear(&msg_write); @@ -1976,6 +1980,12 @@ void MVD_StreamedRecord_f(void) emit_gamestate(mvd); + // check for overflow + if (msg_write.overflowed) { + ret = Q_ERR(EMSGSIZE); + goto fail; + } + // write magic magic = MVD_MAGIC; ret = FS_Write(&magic, 4, f); diff --git a/src/server/save.c b/src/server/save.c index a6bac3437..b8bcaede1 100644 --- a/src/server/save.c +++ b/src/server/save.c @@ -74,6 +74,12 @@ static int write_server_file(savetype_t autosave) } MSG_WriteString(NULL); + // check for overflow + if (msg_write.overflowed) { + SZ_Clear(&msg_write); + return -1; + } + // write server state ret = FS_WriteFile("save/" SAVE_CURRENT "/server.ssv", msg_write.data, msg_write.cursize); diff --git a/src/server/send.c b/src/server/send.c index 0afb0e639..98e7eee77 100644 --- a/src/server/send.c +++ b/src/server/send.c @@ -271,6 +271,9 @@ void SV_Multicast(const vec3_t origin, multicast_t to) if (to && !origin) Com_Error(ERR_DROP, "%s: NULL origin", __func__); + if (msg_write.overflowed) + Com_Error(ERR_DROP, "%s: message buffer overflowed", __func__); + if (!msg_write.cursize) { Com_DPrintf("%s with empty data\n", __func__); return; @@ -389,6 +392,8 @@ void SV_ClientAddMessage(client_t *client, int flags) { int len; + Q_assert(!msg_write.overflowed); + if (!msg_write.cursize) { return; } @@ -755,6 +760,8 @@ static void write_datagram_old(client_t *client) } #endif + Q_assert(!msg_write.overflowed); + // send the datagram cursize = Netchan_Transmit(&client->netchan, msg_write.cursize, @@ -819,6 +826,8 @@ static void write_datagram_new(client_t *client) } #endif + Q_assert(!msg_write.overflowed); + // send the datagram cursize = Netchan_Transmit(&client->netchan, msg_write.cursize,