From bc9c8fab8da44f6f877c1643a3eb0e8431a7580e Mon Sep 17 00:00:00 2001 From: Sheynar Date: Thu, 9 Apr 2026 14:39:54 +0300 Subject: [PATCH 1/7] add x11 nvenc hardware yuv444, split yuv420 and 444 hdr probing --- src/nvenc/nvenc_base.cpp | 2 +- src/nvenc/nvenc_utils.cpp | 3 + src/nvhttp.cpp | 6 +- src/platform/common.h | 2 + src/platform/linux/cuda.cpp | 23 +++++- src/platform/linux/cuda.cu | 71 ++++++++++++++++ src/platform/linux/cuda.h | 4 +- src/video.cpp | 157 +++++++++++++++++++++++++++++------- src/video.h | 2 + 9 files changed, 232 insertions(+), 38 deletions(-) diff --git a/src/nvenc/nvenc_base.cpp b/src/nvenc/nvenc_base.cpp index c63dfa902ee..f24da88cc35 100644 --- a/src/nvenc/nvenc_base.cpp +++ b/src/nvenc/nvenc_base.cpp @@ -183,7 +183,7 @@ namespace nvenc { }; auto buffer_is_yuv444 = [&]() { - return buffer_format == NV_ENC_BUFFER_FORMAT_AYUV || buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT; + return buffer_format == NV_ENC_BUFFER_FORMAT_AYUV || buffer_format == NV_ENC_BUFFER_FORMAT_YUV444 || buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT; }; { diff --git a/src/nvenc/nvenc_utils.cpp b/src/nvenc/nvenc_utils.cpp index 2d19bd46299..1d4271a0ff1 100644 --- a/src/nvenc/nvenc_utils.cpp +++ b/src/nvenc/nvenc_utils.cpp @@ -42,6 +42,9 @@ namespace nvenc { case platf::pix_fmt_e::ayuv: return NV_ENC_BUFFER_FORMAT_AYUV; + case platf::pix_fmt_e::yuv444p: + return NV_ENC_BUFFER_FORMAT_YUV444; + case platf::pix_fmt_e::yuv444p16: return NV_ENC_BUFFER_FORMAT_YUV444_10BIT; diff --git a/src/nvhttp.cpp b/src/nvhttp.cpp index 4db8c6e4e94..fb3eb6a534a 100644 --- a/src/nvhttp.cpp +++ b/src/nvhttp.cpp @@ -741,8 +741,10 @@ namespace nvhttp { codec_mode_flags |= SCM_HEVC_REXT8_444; } } - if (video::active_hevc_mode >= 3) { + if (video::active_hevc_mode == 3 || video::active_hevc_mode == 5) { codec_mode_flags |= SCM_HEVC_MAIN10; + } + if (video::active_hevc_mode == 4 || video::active_hevc_mode == 5) { if (video::last_encoder_probe_supported_yuv444_for_codec[1]) { codec_mode_flags |= SCM_HEVC_REXT10_444; } @@ -811,7 +813,7 @@ namespace nvhttp { for (auto &proc : proc::proc.get_apps()) { pt::ptree app; - app.put("IsHdrSupported"s, video::active_hevc_mode == 3 ? 1 : 0); + app.put("IsHdrSupported"s, video::active_hevc_mode >= 3 ? 1 : 0); app.put("AppTitle"s, proc.name); app.put("ID", proc.id); diff --git a/src/platform/common.h b/src/platform/common.h index 9f11b8473e3..91a4e45678b 100644 --- a/src/platform/common.h +++ b/src/platform/common.h @@ -243,6 +243,7 @@ namespace platf { p010, ///< P010 ayuv, ///< AYUV yuv444p16, ///< Planar 10-bit (shifted to 16-bit) YUV 4:4:4 + yuv444p, ///< Planar 8-bit YUV 4:4:4 y410, ///< Y410 unknown ///< Unknown }; @@ -259,6 +260,7 @@ namespace platf { _CONVERT(p010); _CONVERT(ayuv); _CONVERT(yuv444p16); + _CONVERT(yuv444p); _CONVERT(y410); _CONVERT(unknown); } diff --git a/src/platform/linux/cuda.cpp b/src/platform/linux/cuda.cpp index 9fd5e529b42..cf45ba40d3d 100644 --- a/src/platform/linux/cuda.cpp +++ b/src/platform/linux/cuda.cpp @@ -120,8 +120,10 @@ namespace cuda { this->frame = frame; auto hwframe_ctx = (AVHWFramesContext *) hw_frames_ctx->data; - if (hwframe_ctx->sw_format != AV_PIX_FMT_NV12) { - BOOST_LOG(error) << "cuda::cuda_t doesn't support any format other than AV_PIX_FMT_NV12"sv; + + if (hwframe_ctx->sw_format != AV_PIX_FMT_NV12 && + hwframe_ctx->sw_format != AV_PIX_FMT_YUV444P) { + BOOST_LOG(error) << "cuda::cuda_t doesn't support any format other than AV_PIX_FMT_NV12 and AV_PIX_FMT_YUV444P"sv; return -1; } @@ -178,7 +180,12 @@ namespace cuda { return; } - sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex->texture.linear, stream.get(), {frame->width, frame->height, 0, 0}); + //frame->data[2] not null on YUV444 conversion + if (frame->data[2]) { + sws.convert_yuv444(frame->data[0], frame->data[1], frame->data[2], frame->linesize[0], tex->texture.linear, stream.get(), {frame->width, frame->height, 0, 0}); + } else { + sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex->texture.linear, stream.get(), {frame->width, frame->height, 0, 0}); + } } cudaTextureObject_t tex_obj(const tex_t &tex) const { @@ -200,6 +207,11 @@ namespace cuda { class cuda_ram_t: public cuda_t { public: int convert(platf::img_t &img) override { + + //frame->data[2] not null on YUV444 conversion + if (frame->data[2]) { + return sws.load_ram(img, tex.array) || sws.convert_yuv444(frame->data[0], frame->data[1], frame->data[2], frame->linesize[0], tex_obj(tex), stream.get()); + } return sws.load_ram(img, tex.array) || sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(tex), stream.get()); } @@ -224,6 +236,11 @@ namespace cuda { class cuda_vram_t: public cuda_t { public: int convert(platf::img_t &img) override { + + //frame->data[2] not null on YUV444 conversion + if (frame->data[2]) { + return sws.convert_yuv444(frame->data[0], frame->data[1], frame->data[2], frame->linesize[0], tex_obj(((img_t *) &img)->tex), stream.get()); + } return sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(((img_t *) &img)->tex), stream.get()); } }; diff --git a/src/platform/linux/cuda.cu b/src/platform/linux/cuda.cu index e48dc3a0dc1..2a0379be75a 100644 --- a/src/platform/linux/cuda.cu +++ b/src/platform/linux/cuda.cu @@ -155,6 +155,18 @@ namespace cuda { return (dot(pixel, make_float3(vec_y)) + vec_y.w) * color_matrix->range_y.x + color_matrix->range_y.y; } + inline __device__ float calcU(float3 pixel, const cuda_color_t *const color_matrix) { + float4 vec_u = color_matrix->color_vec_u; + + return (dot(pixel, make_float3(vec_u)) + vec_u.w) * color_matrix->range_uv.x + color_matrix->range_uv.y; + } + + inline __device__ float calcV(float3 pixel, const cuda_color_t *const color_matrix) { + float4 vec_v = color_matrix->color_vec_v; + + return (dot(pixel, make_float3(vec_v)) + vec_v.w) * color_matrix->range_uv.x + color_matrix->range_uv.y; + } + __global__ void RGBA_to_NV12( cudaTextureObject_t srcImage, std::uint8_t *dstY, @@ -205,6 +217,44 @@ namespace cuda { dstY1[1] = calcY(rgb_rb, color_matrix) * 245.0f; // 245.0f is a magic number to ensure slight changes in luminosity are more visible } + __global__ void RGBA_to_YUV444_packed( + cudaTextureObject_t srcImage, + std::uint8_t *dstY, + std::uint8_t *dstU, + std::uint8_t *dstV, + std::uint32_t dstPitchY, + float scale, + const viewport_t viewport, + const cuda_color_t *const color_matrix + ) { + int idX = threadIdx.x + blockDim.x * blockIdx.x; + int idY = threadIdx.y + blockDim.y * blockIdx.y; + + if (idX >= viewport.width) { + return; + } + if (idY >= viewport.height) { + return; + } + + float x = idX * scale; + float y = idY * scale; + + idX += viewport.offsetX; + idY += viewport.offsetY; + + dstY = dstY + idX + idY * dstPitchY; + dstU = dstU + idX + idY * dstPitchY; + dstV = dstV + idX + idY * dstPitchY; + + float3 rgb = bgra_to_rgb(tex2D(srcImage, x, y)); + + dstY[0] = calcY(rgb, color_matrix) * 255.0f; + dstU[0] = calcU(rgb, color_matrix) * 255.0f; + dstV[0] = calcV(rgb, color_matrix) * 255.0f; + + } + int tex_t::copy(std::uint8_t *src, int height, int pitch) { CU_CHECK(cudaMemcpy2DToArray(array, 0, 0, src, pitch, pitch, height, cudaMemcpyDeviceToDevice), "Couldn't copy to cuda array from deviceptr"); @@ -329,6 +379,27 @@ namespace cuda { return CU_CHECK_IGNORE(cudaGetLastError(), "RGBA_to_NV12 failed"); } + int sws_t::convert_yuv444(std::uint8_t *Y, std::uint8_t *U, std::uint8_t *V, std::uint32_t pitch, cudaTextureObject_t texture, stream_t::pointer stream) { + return convert_yuv444(Y, U, V, pitch, texture, stream, viewport); + } + + int sws_t::convert_yuv444(std::uint8_t *Y, std::uint8_t *U, std::uint8_t *V, std::uint32_t pitch, + cudaTextureObject_t texture, stream_t::pointer stream, + const viewport_t &viewport) { + int threadsX = viewport.width; + int threadsY = viewport.height; + + dim3 block(threadsPerBlock); + dim3 grid(div_align(threadsX, threadsPerBlock), threadsY); + + RGBA_to_YUV444_packed<<>>( + texture, Y, U, V, pitch, scale, viewport, + (cuda_color_t *) color_matrix.get() + ); + + return CU_CHECK_IGNORE(cudaGetLastError(), "RGBA_to_YUV444_planar failed"); + } + void sws_t::apply_colorspace(const video::sunshine_colorspace_t &colorspace) { auto color_p = video::color_vectors_from_colorspace(colorspace, true); CU_CHECK_IGNORE(cudaMemcpy(color_matrix.get(), color_p, sizeof(video::color_t), cudaMemcpyHostToDevice), "Couldn't copy color matrix to cuda"); diff --git a/src/platform/linux/cuda.h b/src/platform/linux/cuda.h index 2353a088af5..cc2337b6b70 100644 --- a/src/platform/linux/cuda.h +++ b/src/platform/linux/cuda.h @@ -112,7 +112,9 @@ namespace cuda { // Converts loaded image into a CUDevicePtr int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, stream_t::pointer stream); int convert(std::uint8_t *Y, std::uint8_t *UV, std::uint32_t pitchY, std::uint32_t pitchUV, cudaTextureObject_t texture, stream_t::pointer stream, const viewport_t &viewport); - + int convert_yuv444(std::uint8_t *Y, std::uint8_t *U, std::uint8_t *V, std::uint32_t pitch, cudaTextureObject_t texture, stream_t::pointer stream); + int convert_yuv444(std::uint8_t *Y, std::uint8_t *U, std::uint8_t *V, std::uint32_t pitch, cudaTextureObject_t texture, stream_t::pointer stream, const viewport_t &viewport); + void apply_colorspace(const video::sunshine_colorspace_t &colorspace); int load_ram(platf::img_t &img, cudaArray_t array); diff --git a/src/video.cpp b/src/video.cpp index 00af66c3a69..56cac35de6a 100644 --- a/src/video.cpp +++ b/src/video.cpp @@ -530,8 +530,8 @@ namespace video { #endif AV_PIX_FMT_NV12, AV_PIX_FMT_P010, - AV_PIX_FMT_NONE, - AV_PIX_FMT_NONE, + AV_PIX_FMT_YUV444P, + AV_PIX_FMT_YUV444P16, #ifdef _WIN32 dxgi_init_avcodec_hardware_input_buffer #else @@ -610,7 +610,7 @@ namespace video { {}, // Fallback options "h264_nvenc"s, }, - PARALLEL_ENCODING + PARALLEL_ENCODING | YUV444_SUPPORT }; #endif @@ -1632,14 +1632,25 @@ namespace video { return nullptr; } - if (config.dynamicRange && !video_format[encoder_t::DYNAMIC_RANGE]) { - BOOST_LOG(error) << video_format.name << ": dynamic range not supported"sv; - return nullptr; - } + if (config.chromaSamplingType == 1) { + + if (!video_format[encoder_t::YUV444]) { + BOOST_LOG(error) << video_format.name << ": YUV 4:4:4 not supported"sv; + return nullptr; + } + + if (config.dynamicRange && !video_format[encoder_t::DYNAMIC_RANGE_YUV444]) { + BOOST_LOG(error) << video_format.name << ": YUV 4:4:4 dynamic range not supported"sv; + return nullptr; + } + + } else { + + if (config.dynamicRange && !video_format[encoder_t::DYNAMIC_RANGE]) { + BOOST_LOG(error) << video_format.name << ": dynamic range not supported"sv; + return nullptr; + } - if (config.chromaSamplingType == 1 && !video_format[encoder_t::YUV444]) { - BOOST_LOG(error) << video_format.name << ": YUV 4:4:4 not supported"sv; - return nullptr; } auto codec = avcodec_find_encoder_by_name(video_format.name.c_str()); @@ -2732,16 +2743,17 @@ namespace video { encoder.h264[encoder_t::YUV444] = false; } - const config_t generic_hdr_config = {1920, 1080, 60, 6000, 1000, 1, 0, 3, 1, 1, 0}; + auto test_yuv444 = [&](auto &flag_map, auto video_format) { + const config_t sdr_yuv444_config = {1920, 1080, 60, 6000, 1000, 1, 0, 1, 1, 0, 1}; - // Reset the display since we're switching from SDR to HDR - reset_display(disp, encoder.platform_formats->dev_type, output_name, generic_hdr_config); - if (!disp) { - return false; - } + auto config = sdr_yuv444_config; + + // Reset the display + reset_display(disp, encoder.platform_formats->dev_type, output_name, config); + if (!disp) { + return; + } - auto test_hdr_and_yuv444 = [&](auto &flag_map, auto video_format) { - auto config = generic_hdr_config; config.videoFormat = video_format; if (!flag_map[encoder_t::PASSED]) { @@ -2750,15 +2762,46 @@ namespace video { auto encoder_codec_name = encoder.codec_from_config(config).name; - // Test 4:4:4 HDR first. If 4:4:4 is supported, 4:2:0 should also be supported. + // Test 4:4:4 SDR first config.chromaSamplingType = 1; - if ((encoder.flags & YUV444_SUPPORT) && disp->is_codec_supported(encoder_codec_name, config) && validate_config(disp, encoder, config) >= 0) { - flag_map[encoder_t::DYNAMIC_RANGE] = true; + if ((encoder.flags & YUV444_SUPPORT) && + disp->is_codec_supported(encoder_codec_name, config) && + validate_config(disp, encoder, config) >= 0) { flag_map[encoder_t::YUV444] = true; - return; } else { flag_map[encoder_t::YUV444] = false; } + }; + + auto test_hdr = [&](auto &flag_map, auto video_format) { + + const config_t generic_hdr_config = {1920, 1080, 60, 6000, 1000, 1, 0, 3, 1, 1, 0}; + + auto config = generic_hdr_config; + + // Reset the display + reset_display(disp, encoder.platform_formats->dev_type, output_name, config); + if (!disp) { + return; + } + + config.videoFormat = video_format; + + if (!flag_map[encoder_t::PASSED]) { + return; + } + + auto encoder_codec_name = encoder.codec_from_config(config).name; + + // Test 4:4:4 HDR first. + config.chromaSamplingType = 1; + if ((encoder.flags & YUV444_SUPPORT) && + disp->is_codec_supported(encoder_codec_name, config) && + validate_config(disp, encoder, config) >= 0) { + flag_map[encoder_t::DYNAMIC_RANGE_YUV444] = true; + } else { + flag_map[encoder_t::DYNAMIC_RANGE_YUV444] = false; + } // Test 4:2:0 HDR config.chromaSamplingType = 0; @@ -2771,9 +2814,12 @@ namespace video { // HDR is not supported with H.264. Don't bother even trying it. encoder.h264[encoder_t::DYNAMIC_RANGE] = false; + encoder.h264[encoder_t::DYNAMIC_RANGE_YUV444] = false; - test_hdr_and_yuv444(encoder.hevc, 1); - test_hdr_and_yuv444(encoder.av1, 2); + test_yuv444(encoder.hevc, 1); + test_hdr(encoder.hevc, 1); + test_yuv444(encoder.av1, 2); + test_hdr(encoder.av1, 2); } encoder.h264[encoder_t::VUI_PARAMETERS] = encoder.h264[encoder_t::VUI_PARAMETERS] && !config::sunshine.flags[config::flag::FORCE_VIDEO_HEADER_REPLACE]; @@ -2812,7 +2858,13 @@ namespace video { auto adjust_encoder_constraints = [&](encoder_t *encoder) { // If we can't satisfy both the encoder and codec requirement, prefer the encoder over codec support - if (active_hevc_mode == 3 && !encoder->hevc[encoder_t::DYNAMIC_RANGE]) { + if (active_hevc_mode == 5 && !encoder->hevc[encoder_t::DYNAMIC_RANGE] && !encoder->hevc[encoder_t::DYNAMIC_RANGE_YUV444]) { + BOOST_LOG(warning) << "Encoder ["sv << encoder->name << "] does not support HEVC Main10 Rext10_444 on this system"sv; + active_hevc_mode = 0; + } else if (active_hevc_mode == 4 && !encoder->hevc[encoder_t::DYNAMIC_RANGE_YUV444]) { + BOOST_LOG(warning) << "Encoder ["sv << encoder->name << "] does not support HEVC Rext10_444 on this system"sv; + active_hevc_mode = 0; + } else if (active_hevc_mode == 3 && !encoder->hevc[encoder_t::DYNAMIC_RANGE]) { BOOST_LOG(warning) << "Encoder ["sv << encoder->name << "] does not support HEVC Main10 on this system"sv; active_hevc_mode = 0; } else if (active_hevc_mode == 2 && !encoder->hevc[encoder_t::PASSED]) { @@ -2820,9 +2872,15 @@ namespace video { active_hevc_mode = 0; } - if (active_av1_mode == 3 && !encoder->av1[encoder_t::DYNAMIC_RANGE]) { - BOOST_LOG(warning) << "Encoder ["sv << encoder->name << "] does not support AV1 Main10 on this system"sv; + if (active_av1_mode == 5 && !encoder->av1[encoder_t::DYNAMIC_RANGE] && !encoder->av1[encoder_t::DYNAMIC_RANGE_YUV444]) { + BOOST_LOG(warning) << "Encoder ["sv << encoder->name << "] does not support AV1 Main10 Rext10_444 on this system"sv; active_av1_mode = 0; + } else if (active_hevc_mode == 4 && !encoder->av1[encoder_t::DYNAMIC_RANGE_YUV444]) { + BOOST_LOG(warning) << "Encoder ["sv << encoder->name << "] does not support AV1 Rext10_444 on this system"sv; + active_hevc_mode = 0; + } else if (active_hevc_mode == 3 && !encoder->hevc[encoder_t::DYNAMIC_RANGE]) { + BOOST_LOG(warning) << "Encoder ["sv << encoder->name << "] does not support AV1 Main10 on this system"sv; + active_hevc_mode = 0; } else if (active_av1_mode == 2 && !encoder->av1[encoder_t::PASSED]) { BOOST_LOG(warning) << "Encoder ["sv << encoder->name << "] does not support AV1 on this system"sv; active_av1_mode = 0; @@ -2870,13 +2928,29 @@ namespace video { } // Skip it if it doesn't support the specified codec at all - if ((active_hevc_mode >= 2 && !encoder->hevc[encoder_t::PASSED]) || (active_av1_mode >= 2 && !encoder->av1[encoder_t::PASSED])) { + if ((active_hevc_mode >= 2 && !encoder->hevc[encoder_t::PASSED]) || + (active_av1_mode >= 2 && !encoder->av1[encoder_t::PASSED])) { + pos++; + continue; + } + + // Skip it if it doesn't support HDR on the specified codec + if ((active_hevc_mode == 5 && !encoder->hevc[encoder_t::DYNAMIC_RANGE] && !encoder->hevc[encoder_t::DYNAMIC_RANGE_YUV444]) || + (active_av1_mode == 5 && !encoder->av1[encoder_t::DYNAMIC_RANGE] && !encoder->av1[encoder_t::DYNAMIC_RANGE_YUV444])) { pos++; continue; } // Skip it if it doesn't support HDR on the specified codec - if ((active_hevc_mode == 3 && !encoder->hevc[encoder_t::DYNAMIC_RANGE]) || (active_av1_mode == 3 && !encoder->av1[encoder_t::DYNAMIC_RANGE])) { + if ((active_hevc_mode == 4 && !encoder->hevc[encoder_t::DYNAMIC_RANGE_YUV444]) || + (active_av1_mode == 4 && !encoder->av1[encoder_t::DYNAMIC_RANGE_YUV444])) { + pos++; + continue; + } + + // Skip it if it doesn't support HDR on the specified codec + if ((active_hevc_mode == 3 && !encoder->hevc[encoder_t::DYNAMIC_RANGE]) || + (active_av1_mode == 3 && !encoder->av1[encoder_t::DYNAMIC_RANGE])) { pos++; continue; } @@ -2967,12 +3041,29 @@ namespace video { BOOST_LOG(info) << "Found AV1 encoder: "sv << encoder.av1.name << " ["sv << encoder.name << ']'; } + // 2 - passed + // 3 - HDR yuv420 + // 4 - HDR yuv444 + // 5 - HDR yuv420 & HDR yuv444 + if (active_hevc_mode == 0) { - active_hevc_mode = encoder.hevc[encoder_t::PASSED] ? (encoder.hevc[encoder_t::DYNAMIC_RANGE] ? 3 : 2) : 1; + active_hevc_mode = 1; + if (encoder.hevc[encoder_t::PASSED]) { + active_hevc_mode = 2; + if (encoder.hevc[encoder_t::DYNAMIC_RANGE]) active_hevc_mode += 1; + if (encoder.hevc[encoder_t::DYNAMIC_RANGE_YUV444]) active_hevc_mode += 2; + } + BOOST_LOG(debug) << "ENCODER STATUS ACTIVE_HEVC_MODE: "sv< Date: Thu, 9 Apr 2026 15:09:21 +0300 Subject: [PATCH 2/7] add wayland - cuda egl yuv444 hardware encoding --- src/platform/linux/cuda.cpp | 106 +++-- src/platform/linux/graphics.cpp | 375 +++++++++++++++++- src/platform/linux/graphics.h | 63 ++- src/platform/linux/vaapi.cpp | 8 +- .../linux/assets/shaders/opengl/ConvertU.frag | 26 ++ .../linux/assets/shaders/opengl/ConvertV.frag | 26 ++ 6 files changed, 553 insertions(+), 51 deletions(-) create mode 100644 src_assets/linux/assets/shaders/opengl/ConvertU.frag create mode 100644 src_assets/linux/assets/shaders/opengl/ConvertV.frag diff --git a/src/platform/linux/cuda.cpp b/src/platform/linux/cuda.cpp index cf45ba40d3d..313969fe047 100644 --- a/src/platform/linux/cuda.cpp +++ b/src/platform/linux/cuda.cpp @@ -362,18 +362,26 @@ namespace cuda { auto hw_frames_ctx = (AVHWFramesContext *) hw_frames_ctx_buf->data; sw_format = hw_frames_ctx->sw_format; - auto nv12_opt = egl::create_target(frame->width, frame->height, sw_format); - if (!nv12_opt) { - return -1; - } - auto sws_opt = egl::sws_t::make(width, height, frame->width, frame->height, sw_format); if (!sws_opt) { return -1; } this->sws = std::move(*sws_opt); - this->nv12 = std::move(*nv12_opt); + + if (sw_format == AV_PIX_FMT_YUV444P) { + auto yuv444_opt = egl::create_yuv444_target(frame->width, frame->height, sw_format); + if (!yuv444_opt) { + return -1; + } + this->yuv444 = std::move(*yuv444_opt); + } else { + auto nv12_opt = egl::create_nv12_target(frame->width, frame->height, sw_format); + if (!nv12_opt) { + return -1; + } + this->nv12 = std::move(*nv12_opt); + } auto cuda_ctx = (AVCUDADeviceContext *) hw_frames_ctx->device_ctx->hwctx; @@ -384,9 +392,14 @@ namespace cuda { cuda_ctx->stream = stream.get(); - CU_CHECK(cdf->cuGraphicsGLRegisterImage(&y_res, nv12->tex[0], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register Y plane texture"); - CU_CHECK(cdf->cuGraphicsGLRegisterImage(&uv_res, nv12->tex[1], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register UV plane texture"); - + if (sw_format == AV_PIX_FMT_YUV444P) { + CU_CHECK(cdf->cuGraphicsGLRegisterImage(&y_res,yuv444->tex[0], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register Y texture"); + CU_CHECK(cdf->cuGraphicsGLRegisterImage(&u_res,yuv444->tex[1], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register U texture"); + CU_CHECK(cdf->cuGraphicsGLRegisterImage(&v_res,yuv444->tex[2], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register V texture"); + } else { + CU_CHECK(cdf->cuGraphicsGLRegisterImage(&y_res, nv12->tex[0], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register Y plane texture"); + CU_CHECK(cdf->cuGraphicsGLRegisterImage(&uv_res, nv12->tex[1], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register UV plane texture"); + } return 0; } @@ -416,32 +429,62 @@ namespace cuda { } // Perform the color conversion and scaling in GL - sws.load_vram(descriptor, offset_x, offset_y, rgb->tex[0]); - sws.convert(nv12->buf); + if (sw_format == AV_PIX_FMT_YUV444P) { + sws.load_yuv444_vram(descriptor, offset_x, offset_y, rgb->tex[0]); + sws.convert_yuv444(yuv444->buf); + } else { + sws.load_nv12_vram(descriptor, offset_x, offset_y, rgb->tex[0]); + sws.convert_nv12(nv12->buf); + } auto fmt_desc = av_pix_fmt_desc_get(sw_format); + + if (sw_format == AV_PIX_FMT_YUV444P) { + + // Map the GL textures to read for CUDA + CUgraphicsResource resources[3] = {y_res.get(), u_res.get(), v_res.get()}; + CU_CHECK(cdf->cuGraphicsMapResources(3, resources, stream.get()), "Couldn't map GL textures in CUDA"); + + // Copy from the GL textures to the target CUDA frame + for (int i = 0; i < 3; i++) { + CUDA_MEMCPY2D cpy = {}; + cpy.srcMemoryType = CU_MEMORYTYPE_ARRAY; + CU_CHECK(cdf->cuGraphicsSubResourceGetMappedArray(&cpy.srcArray, resources[i], 0, 0), "Couldn't get mapped plane array"); + + cpy.dstMemoryType = CU_MEMORYTYPE_DEVICE; + cpy.dstDevice = (CUdeviceptr) frame->data[i]; + cpy.dstPitch = frame->linesize[i]; + cpy.WidthInBytes = (frame->width * fmt_desc->comp[i].step); + cpy.Height = frame->height; + + CU_CHECK_IGNORE(cdf->cuMemcpy2DAsync(&cpy, stream.get()), "Couldn't copy texture to CUDA frame"); + } + // Unmap the textures to allow modification from GL again + CU_CHECK(cdf->cuGraphicsUnmapResources(3, resources, stream.get()), "Couldn't unmap GL textures from CUDA"); - // Map the GL textures to read for CUDA - CUgraphicsResource resources[2] = {y_res.get(), uv_res.get()}; - CU_CHECK(cdf->cuGraphicsMapResources(2, resources, stream.get()), "Couldn't map GL textures in CUDA"); - - // Copy from the GL textures to the target CUDA frame - for (int i = 0; i < 2; i++) { - CUDA_MEMCPY2D cpy = {}; - cpy.srcMemoryType = CU_MEMORYTYPE_ARRAY; - CU_CHECK(cdf->cuGraphicsSubResourceGetMappedArray(&cpy.srcArray, resources[i], 0, 0), "Couldn't get mapped plane array"); - - cpy.dstMemoryType = CU_MEMORYTYPE_DEVICE; - cpy.dstDevice = (CUdeviceptr) frame->data[i]; - cpy.dstPitch = frame->linesize[i]; - cpy.WidthInBytes = (frame->width * fmt_desc->comp[i].step) >> (i ? fmt_desc->log2_chroma_w : 0); - cpy.Height = frame->height >> (i ? fmt_desc->log2_chroma_h : 0); + } else { + CUgraphicsResource resources[2] = {y_res.get(), uv_res.get()}; + CU_CHECK(cdf->cuGraphicsMapResources(2, resources, stream.get()), "Couldn't map GL textures in CUDA"); + + // Copy from the GL textures to the target CUDA frame + for (int i = 0; i < 2; i++) { + CUDA_MEMCPY2D cpy = {}; + cpy.srcMemoryType = CU_MEMORYTYPE_ARRAY; + CU_CHECK(cdf->cuGraphicsSubResourceGetMappedArray(&cpy.srcArray, resources[i], 0, 0), "Couldn't get mapped plane array"); + + cpy.dstMemoryType = CU_MEMORYTYPE_DEVICE; + cpy.dstDevice = (CUdeviceptr) frame->data[i]; + cpy.dstPitch = frame->linesize[i]; + cpy.WidthInBytes = (frame->width * fmt_desc->comp[i].step) >> (i ? fmt_desc->log2_chroma_w : 0); + cpy.Height = frame->height >> (i ? fmt_desc->log2_chroma_h : 0); + + CU_CHECK_IGNORE(cdf->cuMemcpy2DAsync(&cpy, stream.get()), "Couldn't copy texture to CUDA frame"); + } + // Unmap the textures to allow modification from GL again + CU_CHECK(cdf->cuGraphicsUnmapResources(2, resources, stream.get()), "Couldn't unmap GL textures from CUDA"); - CU_CHECK_IGNORE(cdf->cuMemcpy2DAsync(&cpy, stream.get()), "Couldn't copy texture to CUDA frame"); } - - // Unmap the textures to allow modification from GL again - CU_CHECK(cdf->cuGraphicsUnmapResources(2, resources, stream.get()), "Couldn't unmap GL textures from CUDA"); + return 0; } @@ -463,6 +506,7 @@ namespace cuda { egl::sws_t sws; egl::nv12_t nv12; + egl::yuv444_t yuv444; AVPixelFormat sw_format; int height; @@ -472,6 +516,8 @@ namespace cuda { egl::rgb_t rgb; registered_resource_t y_res; + registered_resource_t u_res; + registered_resource_t v_res; registered_resource_t uv_res; int offset_x; diff --git a/src/platform/linux/graphics.cpp b/src/platform/linux/graphics.cpp index 6cc2eb3bc3e..747421580c9 100644 --- a/src/platform/linux/graphics.cpp +++ b/src/platform/linux/graphics.cpp @@ -703,6 +703,68 @@ namespace egl { return nv12; } + //yuv444 version + std::optional import_target_yuv444( + display_t::pointer egl_display, + std::array &&fds, + const surface_descriptor_t &y, const surface_descriptor_t &u, const surface_descriptor_t &v) { + auto y_attribs = surface_descriptor_to_egl_attribs(y); + auto u_attribs = surface_descriptor_to_egl_attribs(u); + auto v_attribs = surface_descriptor_to_egl_attribs(v); + + yuv444_t yuv444 { + egl_display, + eglCreateImage(egl_display, EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, nullptr, y_attribs.data()), + eglCreateImage(egl_display, EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, nullptr, v_attribs.data()), + eglCreateImage(egl_display, EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, nullptr, v_attribs.data()), + gl::tex_t::make(3), + gl::frame_buf_t::make(3), + std::move(fds) + }; + + if (!yuv444->r8 || !yuv444->g8 || !yuv444->b8) { + BOOST_LOG(error) << "Couldn't import YUV target: "sv << util::hex(eglGetError()).to_string_view(); + + return std::nullopt; + } + + gl::ctx.BindTexture(GL_TEXTURE_2D, yuv444->tex[0]); + if (!gl::egl_image_target_texture_2d()) { + BOOST_LOG(error) << "glEGLImageTargetTexture2DOES is not available; cannot import YUV DMA-BUF"sv; + return std::nullopt; + } + gl::egl_image_target_texture_2d()(GL_TEXTURE_2D, yuv444->r8); + + gl::ctx.BindTexture(GL_TEXTURE_2D, yuv444->tex[1]); + gl::egl_image_target_texture_2d()(GL_TEXTURE_2D, yuv444->g8); + + gl::ctx.BindTexture(GL_TEXTURE_2D, yuv444->tex[2]); + gl::egl_image_target_texture_2d()(GL_TEXTURE_2D, yuv444->b8); + + yuv444->buf.bind(std::begin(yuv444->tex), std::end(yuv444->tex)); + + GLenum attachments[] { + GL_COLOR_ATTACHMENT0, + GL_COLOR_ATTACHMENT1, + GL_COLOR_ATTACHMENT2 + }; + + for (int x = 0; x < sizeof(attachments) / sizeof(decltype(attachments[0])); ++x) { + gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, yuv444->buf[x]); + gl::ctx.DrawBuffers(1, &attachments[x]); + + const float y_black[] = {0.0f, 0.0f, 0.0f, 0.0f}; + const float uv_black[] = {0.5f, 0.5f, 0.5f, 0.5f}; + gl::ctx.ClearBufferfv(GL_COLOR, 0, x == 0 ? y_black : uv_black); + } + + gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, 0); + + gl_drain_errors; + + return yuv444; + } + /** * @brief Create biplanar YUV textures to render into. * @param width Width of the target frame. @@ -710,7 +772,7 @@ namespace egl { * @param format Format of the target frame. * @return The new RGB texture. */ - std::optional create_target(int width, int height, AVPixelFormat format) { + std::optional create_nv12_target(int width, int height, AVPixelFormat format) { nv12_t nv12 { EGL_NO_DISPLAY, EGL_NO_IMAGE, @@ -764,6 +826,69 @@ namespace egl { return nv12; } + std::optional create_yuv444_target(int width, int height, AVPixelFormat format) { + + yuv444_t yuv444 { + EGL_NO_DISPLAY, + EGL_NO_IMAGE, + EGL_NO_IMAGE, + EGL_NO_IMAGE, + gl::tex_t::make(3), + gl::frame_buf_t::make(3), + }; + + GLint y_format; + GLint u_format; + GLint v_format; + + // Determine the size of each plane element + auto fmt_desc = av_pix_fmt_desc_get(format); + if (fmt_desc->comp[0].depth <= 8) { + y_format = GL_R8; + u_format = GL_R8; + v_format = GL_R8; + } else if (fmt_desc->comp[0].depth <= 16) { + y_format = GL_R16; + u_format = GL_R16; + v_format = GL_R16; + } else { + BOOST_LOG(error) << "Unsupported target pixel format: "sv << format; + return std::nullopt; + } + + gl::ctx.BindTexture(GL_TEXTURE_2D, yuv444->tex[0]); + gl::ctx.TexStorage2D(GL_TEXTURE_2D, 1, y_format, width, height); + + gl::ctx.BindTexture(GL_TEXTURE_2D, yuv444->tex[1]); + gl::ctx.TexStorage2D(GL_TEXTURE_2D, 1, u_format, width, height); + + gl::ctx.BindTexture(GL_TEXTURE_2D, yuv444->tex[2]); + gl::ctx.TexStorage2D(GL_TEXTURE_2D, 1, v_format, width, height); + + yuv444->buf.bind(std::begin(yuv444->tex), std::end(yuv444->tex)); + + GLenum attachments[] { + GL_COLOR_ATTACHMENT0, + GL_COLOR_ATTACHMENT1, + GL_COLOR_ATTACHMENT2 + }; + + for (int x = 0; x < sizeof(attachments) / sizeof(decltype(attachments[0])); ++x) { + gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, yuv444->buf[x]); + gl::ctx.DrawBuffers(1, &attachments[x]); + + const float y_black[] = {0.0f, 0.0f, 0.0f, 0.0f}; + const float uv_black[] = {0.5f, 0.5f, 0.5f, 0.5f}; + gl::ctx.ClearBufferfv(GL_COLOR, 0, x == 0 ? y_black : uv_black); + } + + gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, 0); + + gl_drain_errors; + + return yuv444; + } + void sws_t::apply_colorspace(const video::sunshine_colorspace_t &colorspace) { auto color_p = video::color_vectors_from_colorspace(colorspace, true); @@ -779,9 +904,10 @@ namespace egl { program[0].bind(color_matrix); program[1].bind(color_matrix); + program[2].bind(color_matrix); } - std::optional sws_t::make(int in_width, int in_height, int out_width, int out_height, gl::tex_t &&tex) { + std::optional sws_t::make_nv12(int in_width, int in_height, int out_width, int out_height, gl::tex_t &&tex) { sws_t sws; sws.serial = std::numeric_limits::max(); @@ -909,7 +1035,135 @@ namespace egl { return sws; } - int sws_t::blank(gl::frame_buf_t &fb, int offsetX, int offsetY, int width, int height) { + std::optional sws_t::make_yuv444(int in_width, int in_height, int out_width, int out_height, gl::tex_t &&tex) { + sws_t sws; + + sws.serial = std::numeric_limits::max(); + + // Ensure aspect ratio is maintained + auto scalar = std::fminf(out_width / (float) in_width, out_height / (float) in_height); + auto out_width_f = in_width * scalar; + auto out_height_f = in_height * scalar; + + // result is always positive + auto offsetX_f = out_width - out_width_f; + auto offsetY_f = out_height - out_height_f; + + sws.out_width = out_width_f; + sws.out_height = out_height_f; + + sws.in_width = in_width; + sws.in_height = in_height; + + sws.offsetX = offsetX_f; + sws.offsetY = offsetY_f; + + { + const char *sources[] { + SUNSHINE_SHADERS_DIR "/Scene.vert", + SUNSHINE_SHADERS_DIR "/ConvertV.frag", + SUNSHINE_SHADERS_DIR "/ConvertU.frag", + SUNSHINE_SHADERS_DIR "/ConvertY.frag", + SUNSHINE_SHADERS_DIR "/Scene.frag", + }; + + GLenum shader_type[2] { + GL_FRAGMENT_SHADER, + GL_VERTEX_SHADER, + }; + + constexpr auto count = sizeof(sources) / sizeof(const char *); + + util::Either compiled_sources[count]; + + bool error_flag = false; + for (int x = 0; x < count; ++x) { + auto &compiled_source = compiled_sources[x]; + + int num = x == 0 ? 1 : 0; + compiled_source = gl::shader_t::compile(file_handler::read_file(sources[x]), shader_type[num]); + gl_drain_errors; + + if (compiled_source.has_right()) { + BOOST_LOG(error) << sources[x] << ": "sv << compiled_source.right(); + error_flag = true; + } + } + + if (error_flag) { + return std::nullopt; + } + + auto program = gl::program_t::link(compiled_sources[0].left(), compiled_sources[4].left()); + if (program.has_right()) { + BOOST_LOG(error) << "GL linker (cursor shader): "sv << program.right(); + return std::nullopt; + } + + // Cursor - shader + sws.program[3] = std::move(program.left()); + + program = gl::program_t::link(compiled_sources[0].left(), compiled_sources[1].left()); + if (program.has_right()) { + BOOST_LOG(error) << "GL linker (V - shader): "sv << program.right(); + return std::nullopt; + } + + // V - shader + sws.program[2] = std::move(program.left()); + + program = gl::program_t::link(compiled_sources[0].left(), compiled_sources[2].left()); //HERE!! + if (program.has_right()) { + BOOST_LOG(error) << "GL linker (U - shader): "sv << program.right(); + return std::nullopt; + } + + // U - shader + sws.program[1] = std::move(program.left()); + + program = gl::program_t::link(compiled_sources[0].left(), compiled_sources[3].left()); + if (program.has_right()) { + BOOST_LOG(error) << "GL linker (Y - shader): "sv << program.right(); + return std::nullopt; + } + + // Y - shader + sws.program[0] = std::move(program.left()); + } + + auto color_p = video::color_vectors_from_colorspace({video::colorspace_e::rec709, true, 8}, false); + std::pair members[] { + std::make_pair("color_vec_y", util::view(color_p->color_vec_y)), + std::make_pair("color_vec_u", util::view(color_p->color_vec_u)), + std::make_pair("color_vec_v", util::view(color_p->color_vec_v)), + std::make_pair("range_y", util::view(color_p->range_y)), + std::make_pair("range_uv", util::view(color_p->range_uv)), + }; + + auto color_matrix = sws.program[0].uniform("ColorMatrix", members, sizeof(members) / sizeof(decltype(members[0]))); + if (!color_matrix) { + return std::nullopt; + } + + sws.color_matrix = std::move(*color_matrix); + + sws.tex = std::move(tex); + + sws.cursor_framebuffer = gl::frame_buf_t::make(1); + sws.cursor_framebuffer.bind(&sws.tex[0], &sws.tex[1]); + + sws.program[0].bind(sws.color_matrix); + sws.program[1].bind(sws.color_matrix); + sws.program[2].bind(sws.color_matrix); + + gl::ctx.BlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + + gl_drain_errors; + + return sws; + } + + int sws_t::blank(gl::frame_buf_t &fb, int offsetX, int offsetY, int width, int height, AVPixelFormat format) { auto f = [&]() { std::swap(offsetX, this->offsetX); std::swap(offsetY, this->offsetY); @@ -919,8 +1173,10 @@ namespace egl { f(); auto fg = util::fail_guard(f); - - return convert(fb); + if (format == AV_PIX_FMT_YUV444P) { + return convert_yuv444(fb); + } + return convert_nv12(fb); } std::optional sws_t::make(int in_width, int in_height, int out_width, int out_height, AVPixelFormat format) { @@ -954,17 +1210,27 @@ namespace egl { gl::ctx.BindTexture(GL_TEXTURE_2D, tex[0]); gl::ctx.TexStorage2D(GL_TEXTURE_2D, 1, gl_format, in_width, in_height); - return make(in_width, in_height, out_width, out_height, std::move(tex)); + if (format == AV_PIX_FMT_YUV444P) { + return make_yuv444(in_width, in_height, out_width, out_height, std::move(tex)); + } + return make_nv12(in_width, in_height, out_width, out_height, std::move(tex)); } - void sws_t::load_ram(platf::img_t &img) { + void sws_t::load_nv12_ram(platf::img_t &img) { loaded_texture = tex[0]; gl::ctx.BindTexture(GL_TEXTURE_2D, loaded_texture); gl::ctx.TexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, img.width, img.height, GL_BGRA, GL_UNSIGNED_BYTE, img.data); } - void sws_t::load_vram(img_descriptor_t &img, int offset_x, int offset_y, int texture) { + void sws_t::load_yuv444_ram(platf::img_t &img) { + loaded_texture = tex[0]; + + gl::ctx.BindTexture(GL_TEXTURE_2D, loaded_texture); + gl::ctx.TexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, img.width, img.height, GL_BGRA, GL_UNSIGNED_BYTE, img.data); + } + + void sws_t::load_nv12_vram(img_descriptor_t &img, int offset_x, int offset_y, int texture) { // When only a sub-part of the image must be encoded... const bool copy = offset_x || offset_y || img.sd.width != in_width || img.sd.height != in_height; if (copy) { @@ -1023,7 +1289,65 @@ namespace egl { } } - int sws_t::convert(gl::frame_buf_t &fb) { + void sws_t::load_yuv444_vram(img_descriptor_t &img, int offset_x, int offset_y, int texture) { + // When only a sub-part of the image must be encoded... + const bool copy = offset_x || offset_y || img.sd.width != in_width || img.sd.height != in_height; + if (copy) { + auto framebuf = gl::frame_buf_t::make(1); + framebuf.bind(&texture, &texture + 1); + + loaded_texture = tex[0]; + framebuf.copy(0, loaded_texture, offset_x, offset_y, in_width, in_height); + } else { + loaded_texture = texture; + } + if (img.data) { + GLenum attachment = GL_COLOR_ATTACHMENT0; + + gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, cursor_framebuffer[0]); + gl::ctx.UseProgram(program[3].handle()); + + // When a copy has already been made... + if (!copy) { + gl::ctx.BindTexture(GL_TEXTURE_2D, texture); + gl::ctx.DrawBuffers(1, &attachment); + + gl::ctx.Viewport(0, 0, in_width, in_height); + gl::ctx.DrawArrays(GL_TRIANGLES, 0, 3); + + loaded_texture = tex[0]; + } + + gl::ctx.BindTexture(GL_TEXTURE_2D, tex[1]); + if (serial != img.serial) { + serial = img.serial; + + gl::ctx.TexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, img.src_w, img.src_h, 0, GL_BGRA, GL_UNSIGNED_BYTE, img.data); + } + + gl::ctx.Enable(GL_BLEND); + + gl::ctx.DrawBuffers(1, &attachment); + + #ifndef NDEBUG + auto status = gl::ctx.CheckFramebufferStatus(GL_FRAMEBUFFER); + if (status != GL_FRAMEBUFFER_COMPLETE) { + BOOST_LOG(error) << "Pass Cursor: CheckFramebufferStatus() --> [0x"sv << util::hex(status).to_string_view() << ']'; + return; + } + #endif + + gl::ctx.Viewport(img.x, img.y, img.width, img.height); + gl::ctx.DrawArrays(GL_TRIANGLES, 0, 3); + + gl::ctx.Disable(GL_BLEND); + + gl::ctx.BindTexture(GL_TEXTURE_2D, 0); + gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, 0); + } + } + + int sws_t::convert_nv12(gl::frame_buf_t &fb) { gl::ctx.BindTexture(GL_TEXTURE_2D, loaded_texture); GLenum attachments[] { @@ -1054,6 +1378,39 @@ namespace egl { return 0; } + + int sws_t::convert_yuv444(gl::frame_buf_t &fb) { + gl::ctx.BindTexture(GL_TEXTURE_2D, loaded_texture); + + GLenum attachments[] { + GL_COLOR_ATTACHMENT0, + GL_COLOR_ATTACHMENT1, + GL_COLOR_ATTACHMENT2 + }; + + for (int x = 0; x < sizeof(attachments) / sizeof(decltype(attachments[0])); ++x) { + gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, fb[x]); + gl::ctx.DrawBuffers(1, &attachments[x]); + + #ifndef NDEBUG + auto status = gl::ctx.CheckFramebufferStatus(GL_FRAMEBUFFER); + if (status != GL_FRAMEBUFFER_COMPLETE) { + BOOST_LOG(error) << "Pass "sv << x << ": CheckFramebufferStatus() --> [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } + #endif + + gl::ctx.UseProgram(program[x].handle()); + gl::ctx.Viewport(offsetX, offsetY, out_width, out_height); + gl::ctx.DrawArrays(GL_TRIANGLES, 0, 3); + } + + gl::ctx.BindTexture(GL_TEXTURE_2D, 0); + + gl::ctx.Flush(); + + return 0; + } } // namespace egl void free_frame(AVFrame *frame) { diff --git a/src/platform/linux/graphics.h b/src/platform/linux/graphics.h index 286a700bf25..3bf556d44bd 100644 --- a/src/platform/linux/graphics.h +++ b/src/platform/linux/graphics.h @@ -210,6 +210,21 @@ namespace egl { std::array fds; }; + struct yuv444_img_t { + display_t::pointer display; + EGLImage r8; + EGLImage g8; + EGLImage b8; + + gl::tex_t tex; + gl::frame_buf_t buf; + + // sizeof(va::DRMPRIMESurfaceDescriptor::objects) / sizeof(va::DRMPRIMESurfaceDescriptor::objects[0]); + static constexpr std::size_t num_fds = 4; + + std::array fds; + }; + KITTY_USING_MOVE_T(rgb_t, rgb_img_t, , { if (el.xrgb8) { eglDestroyImage(el.display, el.xrgb8); @@ -226,6 +241,20 @@ namespace egl { } }); + KITTY_USING_MOVE_T(yuv444_t, yuv444_img_t, , { + if (el.r8) { + eglDestroyImage(el.display, el.r8); + } + + if (el.g8) { + eglDestroyImage(el.display, el.g8); + } + + if (el.b8) { + eglDestroyImage(el.display, el.b8); + } + }); + KITTY_USING_MOVE_T(ctx_t, (std::tuple), , { TUPLE_2D_REF(disp, ctx, el); if (ctx) { @@ -262,6 +291,14 @@ namespace egl { const surface_descriptor_t &uv ); + std::optional import_target( + display_t::pointer egl_display, + std::array &&fds, + const surface_descriptor_t &y, + const surface_descriptor_t &u, + const surface_descriptor_t &v + ); + /** * @brief Creates biplanar YUV textures to render into. * @param width Width of the target frame. @@ -269,7 +306,9 @@ namespace egl { * @param format Format of the target frame. * @return The new RGB texture. */ - std::optional create_target(int width, int height, AVPixelFormat format); + std::optional create_nv12_target(int width, int height, AVPixelFormat format); + + std::optional create_yuv444_target(int width, int height, AVPixelFormat format); class cursor_t: public platf::img_t { public: @@ -317,17 +356,24 @@ namespace egl { class sws_t { public: - static std::optional make(int in_width, int in_height, int out_width, int out_height, gl::tex_t &&tex); + static std::optional make_nv12(int in_width, int in_height, int out_width, int out_height, gl::tex_t &&tex); + static std::optional make_yuv444(int in_width, int in_height, int out_width, int out_height, gl::tex_t &&tex); static std::optional make(int in_width, int in_height, int out_width, int out_height, AVPixelFormat format); // Convert the loaded image into the first two framebuffers - int convert(gl::frame_buf_t &fb); + int convert_nv12(gl::frame_buf_t &fb); + + // Convert the loaded image into the first three framebuffers + int convert_yuv444(gl::frame_buf_t &fb); // Make an area of the image black - int blank(gl::frame_buf_t &fb, int offsetX, int offsetY, int width, int height); + int blank(gl::frame_buf_t &fb, int offsetX, int offsetY, int width, int height, AVPixelFormat format); + + void load_nv12_ram(platf::img_t &img); + void load_nv12_vram(img_descriptor_t &img, int offset_x, int offset_y, int texture); - void load_ram(platf::img_t &img); - void load_vram(img_descriptor_t &img, int offset_x, int offset_y, int texture); + void load_yuv444_ram(platf::img_t &img); + void load_yuv444_vram(img_descriptor_t &img, int offset_x, int offset_y, int texture); void apply_colorspace(const video::sunshine_colorspace_t &colorspace); @@ -339,8 +385,9 @@ namespace egl { gl::frame_buf_t cursor_framebuffer; gl::frame_buf_t copy_framebuffer; - // Y - shader, UV - shader, Cursor - shader - gl::program_t program[3]; + // Y - shader, UV - shader, Cursor - shader : for nv12 + // Y - shader, U - shader, V - shader, Cursor - shader : for yuv444 + gl::program_t program[4]; gl::buffer_t color_matrix; int out_width; diff --git a/src/platform/linux/vaapi.cpp b/src/platform/linux/vaapi.cpp index 525ef76b6fb..107643da6f5 100644 --- a/src/platform/linux/vaapi.cpp +++ b/src/platform/linux/vaapi.cpp @@ -405,9 +405,9 @@ namespace va { class va_ram_t: public va_t { public: int convert(platf::img_t &img) override { - sws.load_ram(img); + sws.load_nv12_ram(img); - sws.convert(nv12->buf); + sws.convert_nv12(nv12->buf); return 0; } }; @@ -434,9 +434,9 @@ namespace va { rgb = std::move(*rgb_opt); } - sws.load_vram(descriptor, offset_x, offset_y, rgb->tex[0]); + sws.load_nv12_vram(descriptor, offset_x, offset_y, rgb->tex[0]); - sws.convert(nv12->buf); + sws.convert_nv12(nv12->buf); return 0; } diff --git a/src_assets/linux/assets/shaders/opengl/ConvertU.frag b/src_assets/linux/assets/shaders/opengl/ConvertU.frag new file mode 100644 index 00000000000..35d08740c07 --- /dev/null +++ b/src_assets/linux/assets/shaders/opengl/ConvertU.frag @@ -0,0 +1,26 @@ +#version 300 es + +#ifdef GL_ES +precision lowp float; +#endif + +uniform sampler2D image; + +layout(shared) uniform ColorMatrix { + vec4 color_vec_y; + vec4 color_vec_u; + vec4 color_vec_v; + vec2 range_y; + vec2 range_uv; +}; + +in vec2 tex; +layout(location = 0) out float color; + +void main() +{ + vec3 rgb = texture(image, tex).rgb; + float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w; + + color = u * range_uv.x + range_uv.y; +} \ No newline at end of file diff --git a/src_assets/linux/assets/shaders/opengl/ConvertV.frag b/src_assets/linux/assets/shaders/opengl/ConvertV.frag new file mode 100644 index 00000000000..777e4f3703a --- /dev/null +++ b/src_assets/linux/assets/shaders/opengl/ConvertV.frag @@ -0,0 +1,26 @@ +#version 300 es + +#ifdef GL_ES +precision lowp float; +#endif + +uniform sampler2D image; + +layout(shared) uniform ColorMatrix { + vec4 color_vec_y; + vec4 color_vec_u; + vec4 color_vec_v; + vec2 range_y; + vec2 range_uv; +}; + +in vec2 tex; +layout(location = 0) out float color; + +void main() +{ + vec3 rgb = texture(image, tex).rgb; + float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w; + + color = v * range_uv.x + range_uv.y; +} \ No newline at end of file From dd9f4a7dd4f3e7f26c9f9fb52ef48507e60ffa48 Mon Sep 17 00:00:00 2001 From: sheynar Date: Mon, 13 Apr 2026 18:27:39 +0300 Subject: [PATCH 3/7] fix codegate issues/readability --- src/nvhttp.cpp | 64 +++++++++++++++------------- src/platform/linux/graphics.cpp | 22 ++++------ src/platform/linux/graphics.h | 3 +- src/video.cpp | 74 +++++++++++++++------------------ 4 files changed, 76 insertions(+), 87 deletions(-) diff --git a/src/nvhttp.cpp b/src/nvhttp.cpp index fb3eb6a534a..84ed0e97164 100644 --- a/src/nvhttp.cpp +++ b/src/nvhttp.cpp @@ -680,6 +680,39 @@ namespace nvhttp { return true; } + uint32_t get_codec_mode_flags() { + uint32_t codec_mode_flags = SCM_H264; + if (video::last_encoder_probe_supported_yuv444_for_codec[0]) { + codec_mode_flags |= SCM_H264_HIGH8_444; + } + if (video::active_hevc_mode >= 2) { + codec_mode_flags |= SCM_HEVC; + if (video::last_encoder_probe_supported_yuv444_for_codec[1]) { + codec_mode_flags |= SCM_HEVC_REXT8_444; + } + } + if (video::active_hevc_mode == 3 || video::active_hevc_mode == 5) { + codec_mode_flags |= SCM_HEVC_MAIN10; + } + if ((video::active_hevc_mode == 4 || video::active_hevc_mode == 5) && video::last_encoder_probe_supported_yuv444_for_codec[1]) { + codec_mode_flags |= SCM_HEVC_REXT10_444; + } + + if (video::active_av1_mode >= 2) { + codec_mode_flags |= SCM_AV1_MAIN8; + if (video::last_encoder_probe_supported_yuv444_for_codec[2]) { + codec_mode_flags |= SCM_AV1_HIGH8_444; + } + } + if (video::active_av1_mode == 3 || video::active_av1_mode == 5) { + codec_mode_flags |= SCM_AV1_MAIN10; + } + if ((video::active_av1_mode == 4 || video::active_av1_mode == 5) && video::last_encoder_probe_supported_yuv444_for_codec[2]) { + codec_mode_flags |= SCM_AV1_HIGH10_444; + } + return codec_mode_flags; + } + template void serverinfo(std::shared_ptr::Response> response, std::shared_ptr::Request> request) { print_req(request); @@ -731,36 +764,7 @@ namespace nvhttp { tree.put("root.LocalIP", net::addr_to_normalized_string(local_endpoint.address())); } - uint32_t codec_mode_flags = SCM_H264; - if (video::last_encoder_probe_supported_yuv444_for_codec[0]) { - codec_mode_flags |= SCM_H264_HIGH8_444; - } - if (video::active_hevc_mode >= 2) { - codec_mode_flags |= SCM_HEVC; - if (video::last_encoder_probe_supported_yuv444_for_codec[1]) { - codec_mode_flags |= SCM_HEVC_REXT8_444; - } - } - if (video::active_hevc_mode == 3 || video::active_hevc_mode == 5) { - codec_mode_flags |= SCM_HEVC_MAIN10; - } - if (video::active_hevc_mode == 4 || video::active_hevc_mode == 5) { - if (video::last_encoder_probe_supported_yuv444_for_codec[1]) { - codec_mode_flags |= SCM_HEVC_REXT10_444; - } - } - if (video::active_av1_mode >= 2) { - codec_mode_flags |= SCM_AV1_MAIN8; - if (video::last_encoder_probe_supported_yuv444_for_codec[2]) { - codec_mode_flags |= SCM_AV1_HIGH8_444; - } - } - if (video::active_av1_mode >= 3) { - codec_mode_flags |= SCM_AV1_MAIN10; - if (video::last_encoder_probe_supported_yuv444_for_codec[2]) { - codec_mode_flags |= SCM_AV1_HIGH10_444; - } - } + const uint32_t codec_mode_flags = get_codec_mode_flags(); tree.put("root.ServerCodecModeSupport", codec_mode_flags); if (!config::nvhttp.external_ip.empty()) { diff --git a/src/platform/linux/graphics.cpp b/src/platform/linux/graphics.cpp index 747421580c9..4ca713d77c9 100644 --- a/src/platform/linux/graphics.cpp +++ b/src/platform/linux/graphics.cpp @@ -651,6 +651,10 @@ namespace egl { return rgb; } + //constants for clear black color Y, U, V. U & V are same so: + const float y_black[] = {0.0f, 0.0f, 0.0f, 0.0f}; + const float uv_black[] = {0.5f, 0.5f, 0.5f, 0.5f}; + std::optional import_target(display_t::pointer egl_display, std::array &&fds, const surface_descriptor_t &y, const surface_descriptor_t &uv) { auto y_attribs = surface_descriptor_to_egl_attribs(y); auto uv_attribs = surface_descriptor_to_egl_attribs(uv); @@ -690,9 +694,6 @@ namespace egl { for (int x = 0; x < sizeof(attachments) / sizeof(decltype(attachments[0])); ++x) { gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, nv12->buf[x]); gl::ctx.DrawBuffers(1, &attachments[x]); - - const float y_black[] = {0.0f, 0.0f, 0.0f, 0.0f}; - const float uv_black[] = {0.5f, 0.5f, 0.5f, 0.5f}; gl::ctx.ClearBufferfv(GL_COLOR, 0, x == 0 ? y_black : uv_black); } @@ -752,9 +753,6 @@ namespace egl { for (int x = 0; x < sizeof(attachments) / sizeof(decltype(attachments[0])); ++x) { gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, yuv444->buf[x]); gl::ctx.DrawBuffers(1, &attachments[x]); - - const float y_black[] = {0.0f, 0.0f, 0.0f, 0.0f}; - const float uv_black[] = {0.5f, 0.5f, 0.5f, 0.5f}; gl::ctx.ClearBufferfv(GL_COLOR, 0, x == 0 ? y_black : uv_black); } @@ -813,9 +811,6 @@ namespace egl { for (int x = 0; x < sizeof(attachments) / sizeof(decltype(attachments[0])); ++x) { gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, nv12->buf[x]); gl::ctx.DrawBuffers(1, &attachments[x]); - - const float y_black[] = {0.0f, 0.0f, 0.0f, 0.0f}; - const float uv_black[] = {0.5f, 0.5f, 0.5f, 0.5f}; gl::ctx.ClearBufferfv(GL_COLOR, 0, x == 0 ? y_black : uv_black); } @@ -876,9 +871,6 @@ namespace egl { for (int x = 0; x < sizeof(attachments) / sizeof(decltype(attachments[0])); ++x) { gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, yuv444->buf[x]); gl::ctx.DrawBuffers(1, &attachments[x]); - - const float y_black[] = {0.0f, 0.0f, 0.0f, 0.0f}; - const float uv_black[] = {0.5f, 0.5f, 0.5f, 0.5f}; gl::ctx.ClearBufferfv(GL_COLOR, 0, x == 0 ? y_black : uv_black); } @@ -1163,10 +1155,10 @@ namespace egl { return sws; } - int sws_t::blank(gl::frame_buf_t &fb, int offsetX, int offsetY, int width, int height, AVPixelFormat format) { + int sws_t::blank(gl::frame_buf_t &fb, int offsetX_, int offsetY_, int width, int height, AVPixelFormat format) { auto f = [&]() { - std::swap(offsetX, this->offsetX); - std::swap(offsetY, this->offsetY); + std::swap(offsetX_, this->offsetX); + std::swap(offsetY_, this->offsetY); std::swap(width, this->out_width); std::swap(height, this->out_height); }; diff --git a/src/platform/linux/graphics.h b/src/platform/linux/graphics.h index 3bf556d44bd..54b396a592d 100644 --- a/src/platform/linux/graphics.h +++ b/src/platform/linux/graphics.h @@ -219,7 +219,6 @@ namespace egl { gl::tex_t tex; gl::frame_buf_t buf; - // sizeof(va::DRMPRIMESurfaceDescriptor::objects) / sizeof(va::DRMPRIMESurfaceDescriptor::objects[0]); static constexpr std::size_t num_fds = 4; std::array fds; @@ -367,7 +366,7 @@ namespace egl { int convert_yuv444(gl::frame_buf_t &fb); // Make an area of the image black - int blank(gl::frame_buf_t &fb, int offsetX, int offsetY, int width, int height, AVPixelFormat format); + int blank(gl::frame_buf_t &fb, int offsetX_, int offsetY_, int width, int height, AVPixelFormat format); void load_nv12_ram(platf::img_t &img); void load_nv12_vram(img_descriptor_t &img, int offset_x, int offset_y, int texture); diff --git a/src/video.cpp b/src/video.cpp index 56cac35de6a..5f97c477484 100644 --- a/src/video.cpp +++ b/src/video.cpp @@ -2734,36 +2734,19 @@ namespace video { // Test HDR and YUV444 support { - // H.264 is special because encoders may support YUV 4:4:4 without supporting 10-bit color depth - if (encoder.flags & YUV444_SUPPORT) { - config_t config_h264_yuv444 {1920, 1080, 60, 6000, 1000, 1, 0, 1, 0, 0, 1}; - encoder.h264[encoder_t::YUV444] = disp->is_codec_supported(encoder.h264.name, config_h264_yuv444) && - validate_config(disp, encoder, config_h264_yuv444) >= 0; - } else { - encoder.h264[encoder_t::YUV444] = false; - } - auto test_yuv444 = [&](auto &flag_map, auto video_format) { - const config_t sdr_yuv444_config = {1920, 1080, 60, 6000, 1000, 1, 0, 1, 1, 0, 1}; - - auto config = sdr_yuv444_config; + const config_t config = {1920, 1080, 60, 6000, 1000, 1, 0, 1, video_format, 0, 1}; - // Reset the display reset_display(disp, encoder.platform_formats->dev_type, output_name, config); if (!disp) { return; } - - config.videoFormat = video_format; - if (!flag_map[encoder_t::PASSED]) { return; } auto encoder_codec_name = encoder.codec_from_config(config).name; - // Test 4:4:4 SDR first - config.chromaSamplingType = 1; if ((encoder.flags & YUV444_SUPPORT) && disp->is_codec_supported(encoder_codec_name, config) && validate_config(disp, encoder, config) >= 0) { @@ -2773,28 +2756,39 @@ namespace video { } }; - auto test_hdr = [&](auto &flag_map, auto video_format) { - - const config_t generic_hdr_config = {1920, 1080, 60, 6000, 1000, 1, 0, 3, 1, 1, 0}; - - auto config = generic_hdr_config; + auto test_yuv420_hdr = [&](auto &flag_map, auto video_format) { + const config_t config = {1920, 1080, 60, 6000, 1000, 1, 0, 3, video_format, 1, 0}; - // Reset the display reset_display(disp, encoder.platform_formats->dev_type, output_name, config); if (!disp) { return; } + if (!flag_map[encoder_t::PASSED]) { + return; + } + + auto encoder_codec_name = encoder.codec_from_config(config).name; + + if (disp->is_codec_supported(encoder_codec_name, config) && validate_config(disp, encoder, config) >= 0) { + flag_map[encoder_t::DYNAMIC_RANGE] = true; + } else { + flag_map[encoder_t::DYNAMIC_RANGE] = false; + } + }; - config.videoFormat = video_format; + auto test_yuv444_hdr = [&](auto &flag_map, auto video_format) { + const config_t config = {1920, 1080, 60, 6000, 1000, 1, 0, 3, video_format, 1, 1}; + reset_display(disp, encoder.platform_formats->dev_type, output_name, config); + if (!disp) { + return; + } if (!flag_map[encoder_t::PASSED]) { return; } auto encoder_codec_name = encoder.codec_from_config(config).name; - // Test 4:4:4 HDR first. - config.chromaSamplingType = 1; if ((encoder.flags & YUV444_SUPPORT) && disp->is_codec_supported(encoder_codec_name, config) && validate_config(disp, encoder, config) >= 0) { @@ -2802,24 +2796,19 @@ namespace video { } else { flag_map[encoder_t::DYNAMIC_RANGE_YUV444] = false; } - - // Test 4:2:0 HDR - config.chromaSamplingType = 0; - if (disp->is_codec_supported(encoder_codec_name, config) && validate_config(disp, encoder, config) >= 0) { - flag_map[encoder_t::DYNAMIC_RANGE] = true; - } else { - flag_map[encoder_t::DYNAMIC_RANGE] = false; - } }; + test_yuv444(encoder.h264, 0); // HDR is not supported with H.264. Don't bother even trying it. encoder.h264[encoder_t::DYNAMIC_RANGE] = false; encoder.h264[encoder_t::DYNAMIC_RANGE_YUV444] = false; test_yuv444(encoder.hevc, 1); - test_hdr(encoder.hevc, 1); + test_yuv420_hdr(encoder.hevc, 1); + test_yuv444_hdr(encoder.hevc, 1); test_yuv444(encoder.av1, 2); - test_hdr(encoder.av1, 2); + test_yuv420_hdr(encoder.av1, 2); + test_yuv444_hdr(encoder.av1, 2); } encoder.h264[encoder_t::VUI_PARAMETERS] = encoder.h264[encoder_t::VUI_PARAMETERS] && !config::sunshine.flags[config::flag::FORCE_VIDEO_HEADER_REPLACE]; @@ -2856,7 +2845,7 @@ namespace video { active_av1_mode = config::video.av1_mode; last_encoder_probe_supported_ref_frames_invalidation = false; - auto adjust_encoder_constraints = [&](encoder_t *encoder) { + auto adjust_encoder_constraints_hevc = [&](encoder_t *encoder) { // If we can't satisfy both the encoder and codec requirement, prefer the encoder over codec support if (active_hevc_mode == 5 && !encoder->hevc[encoder_t::DYNAMIC_RANGE] && !encoder->hevc[encoder_t::DYNAMIC_RANGE_YUV444]) { BOOST_LOG(warning) << "Encoder ["sv << encoder->name << "] does not support HEVC Main10 Rext10_444 on this system"sv; @@ -2871,7 +2860,10 @@ namespace video { BOOST_LOG(warning) << "Encoder ["sv << encoder->name << "] does not support HEVC on this system"sv; active_hevc_mode = 0; } + }; + auto adjust_encoder_constraints_av1 = [&](encoder_t *encoder) { + // If we can't satisfy both the encoder and codec requirement, prefer the encoder over codec support if (active_av1_mode == 5 && !encoder->av1[encoder_t::DYNAMIC_RANGE] && !encoder->av1[encoder_t::DYNAMIC_RANGE_YUV444]) { BOOST_LOG(warning) << "Encoder ["sv << encoder->name << "] does not support AV1 Main10 Rext10_444 on this system"sv; active_av1_mode = 0; @@ -2900,7 +2892,8 @@ namespace video { } // We will return an encoder here even if it fails one of the codec requirements specified by the user - adjust_encoder_constraints(encoder); + adjust_encoder_constraints_hevc(encoder); + adjust_encoder_constraints_av1(encoder); chosen_encoder = encoder; break; @@ -2979,7 +2972,8 @@ namespace video { } // We will return an encoder here even if it fails one of the codec requirements specified by the user - adjust_encoder_constraints(encoder); + adjust_encoder_constraints_hevc(encoder); + adjust_encoder_constraints_av1(encoder); chosen_encoder = encoder; break; From 7fad32e97c62fb6db4d5492883c374989fc190a6 Mon Sep 17 00:00:00 2001 From: Sheynar Date: Mon, 13 Apr 2026 20:53:16 +0300 Subject: [PATCH 4/7] fix codegate - load_ram/load_vram duplication --- src/platform/linux/cuda.cpp | 4 +- src/platform/linux/graphics.cpp | 79 ++++----------------------------- src/platform/linux/graphics.h | 7 +-- src/platform/linux/vaapi.cpp | 4 +- 4 files changed, 15 insertions(+), 79 deletions(-) diff --git a/src/platform/linux/cuda.cpp b/src/platform/linux/cuda.cpp index 313969fe047..29154172a91 100644 --- a/src/platform/linux/cuda.cpp +++ b/src/platform/linux/cuda.cpp @@ -430,10 +430,10 @@ namespace cuda { // Perform the color conversion and scaling in GL if (sw_format == AV_PIX_FMT_YUV444P) { - sws.load_yuv444_vram(descriptor, offset_x, offset_y, rgb->tex[0]); + sws.load_vram(descriptor, offset_x, offset_y, rgb->tex[0], true); sws.convert_yuv444(yuv444->buf); } else { - sws.load_nv12_vram(descriptor, offset_x, offset_y, rgb->tex[0]); + sws.load_vram(descriptor, offset_x, offset_y, rgb->tex[0], false); sws.convert_nv12(nv12->buf); } diff --git a/src/platform/linux/graphics.cpp b/src/platform/linux/graphics.cpp index 4ca713d77c9..261dfdda691 100644 --- a/src/platform/linux/graphics.cpp +++ b/src/platform/linux/graphics.cpp @@ -716,7 +716,7 @@ namespace egl { yuv444_t yuv444 { egl_display, eglCreateImage(egl_display, EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, nullptr, y_attribs.data()), - eglCreateImage(egl_display, EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, nullptr, v_attribs.data()), + eglCreateImage(egl_display, EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, nullptr, u_attribs.data()), eglCreateImage(egl_display, EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, nullptr, v_attribs.data()), gl::tex_t::make(3), gl::frame_buf_t::make(3), @@ -1104,7 +1104,7 @@ namespace egl { // V - shader sws.program[2] = std::move(program.left()); - program = gl::program_t::link(compiled_sources[0].left(), compiled_sources[2].left()); //HERE!! + program = gl::program_t::link(compiled_sources[0].left(), compiled_sources[2].left()); if (program.has_right()) { BOOST_LOG(error) << "GL linker (U - shader): "sv << program.right(); return std::nullopt; @@ -1208,21 +1208,14 @@ namespace egl { return make_nv12(in_width, in_height, out_width, out_height, std::move(tex)); } - void sws_t::load_nv12_ram(platf::img_t &img) { - loaded_texture = tex[0]; - - gl::ctx.BindTexture(GL_TEXTURE_2D, loaded_texture); - gl::ctx.TexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, img.width, img.height, GL_BGRA, GL_UNSIGNED_BYTE, img.data); - } - - void sws_t::load_yuv444_ram(platf::img_t &img) { + void sws_t::load_ram(platf::img_t &img) { loaded_texture = tex[0]; gl::ctx.BindTexture(GL_TEXTURE_2D, loaded_texture); gl::ctx.TexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, img.width, img.height, GL_BGRA, GL_UNSIGNED_BYTE, img.data); } - void sws_t::load_nv12_vram(img_descriptor_t &img, int offset_x, int offset_y, int texture) { + void sws_t::load_vram(img_descriptor_t &img, int offset_x, int offset_y, int texture, bool is_yuv444) { // When only a sub-part of the image must be encoded... const bool copy = offset_x || offset_y || img.sd.width != in_width || img.sd.height != in_height; if (copy) { @@ -1239,7 +1232,11 @@ namespace egl { GLenum attachment = GL_COLOR_ATTACHMENT0; gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, cursor_framebuffer[0]); - gl::ctx.UseProgram(program[2].handle()); + + //nv12 cursor program[2] + //yuv444 cursor program[3] + const int cursor_program = is_yuv444 ? 3 : 2; + gl::ctx.UseProgram(program[cursor_program].handle()); // When a copy has already been made... if (!copy) { @@ -1281,64 +1278,6 @@ namespace egl { } } - void sws_t::load_yuv444_vram(img_descriptor_t &img, int offset_x, int offset_y, int texture) { - // When only a sub-part of the image must be encoded... - const bool copy = offset_x || offset_y || img.sd.width != in_width || img.sd.height != in_height; - if (copy) { - auto framebuf = gl::frame_buf_t::make(1); - framebuf.bind(&texture, &texture + 1); - - loaded_texture = tex[0]; - framebuf.copy(0, loaded_texture, offset_x, offset_y, in_width, in_height); - } else { - loaded_texture = texture; - } - if (img.data) { - GLenum attachment = GL_COLOR_ATTACHMENT0; - - gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, cursor_framebuffer[0]); - gl::ctx.UseProgram(program[3].handle()); - - // When a copy has already been made... - if (!copy) { - gl::ctx.BindTexture(GL_TEXTURE_2D, texture); - gl::ctx.DrawBuffers(1, &attachment); - - gl::ctx.Viewport(0, 0, in_width, in_height); - gl::ctx.DrawArrays(GL_TRIANGLES, 0, 3); - - loaded_texture = tex[0]; - } - - gl::ctx.BindTexture(GL_TEXTURE_2D, tex[1]); - if (serial != img.serial) { - serial = img.serial; - - gl::ctx.TexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, img.src_w, img.src_h, 0, GL_BGRA, GL_UNSIGNED_BYTE, img.data); - } - - gl::ctx.Enable(GL_BLEND); - - gl::ctx.DrawBuffers(1, &attachment); - - #ifndef NDEBUG - auto status = gl::ctx.CheckFramebufferStatus(GL_FRAMEBUFFER); - if (status != GL_FRAMEBUFFER_COMPLETE) { - BOOST_LOG(error) << "Pass Cursor: CheckFramebufferStatus() --> [0x"sv << util::hex(status).to_string_view() << ']'; - return; - } - #endif - - gl::ctx.Viewport(img.x, img.y, img.width, img.height); - gl::ctx.DrawArrays(GL_TRIANGLES, 0, 3); - - gl::ctx.Disable(GL_BLEND); - - gl::ctx.BindTexture(GL_TEXTURE_2D, 0); - gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, 0); - } - } - int sws_t::convert_nv12(gl::frame_buf_t &fb) { gl::ctx.BindTexture(GL_TEXTURE_2D, loaded_texture); diff --git a/src/platform/linux/graphics.h b/src/platform/linux/graphics.h index 54b396a592d..b17bcc56543 100644 --- a/src/platform/linux/graphics.h +++ b/src/platform/linux/graphics.h @@ -368,11 +368,8 @@ namespace egl { // Make an area of the image black int blank(gl::frame_buf_t &fb, int offsetX_, int offsetY_, int width, int height, AVPixelFormat format); - void load_nv12_ram(platf::img_t &img); - void load_nv12_vram(img_descriptor_t &img, int offset_x, int offset_y, int texture); - - void load_yuv444_ram(platf::img_t &img); - void load_yuv444_vram(img_descriptor_t &img, int offset_x, int offset_y, int texture); + void load_ram(platf::img_t &img); + void load_vram(img_descriptor_t &img, int offset_x, int offset_y, int texture, bool is_yuv444); void apply_colorspace(const video::sunshine_colorspace_t &colorspace); diff --git a/src/platform/linux/vaapi.cpp b/src/platform/linux/vaapi.cpp index 107643da6f5..4f1d6cadfcf 100644 --- a/src/platform/linux/vaapi.cpp +++ b/src/platform/linux/vaapi.cpp @@ -405,7 +405,7 @@ namespace va { class va_ram_t: public va_t { public: int convert(platf::img_t &img) override { - sws.load_nv12_ram(img); + sws.load_ram(img); sws.convert_nv12(nv12->buf); return 0; @@ -434,7 +434,7 @@ namespace va { rgb = std::move(*rgb_opt); } - sws.load_nv12_vram(descriptor, offset_x, offset_y, rgb->tex[0]); + sws.load_vram(descriptor, offset_x, offset_y, rgb->tex[0], false); sws.convert_nv12(nv12->buf); return 0; From 566ed87515629317d9ec84b3831c406ff235b241 Mon Sep 17 00:00:00 2001 From: sheynar Date: Tue, 14 Apr 2026 20:27:31 +0300 Subject: [PATCH 5/7] fix codegate - std::array from c style[] --- src/platform/linux/cuda.cpp | 12 +++---- src/platform/linux/graphics.cpp | 64 ++++++++++++++++----------------- src/platform/linux/graphics.h | 2 +- 3 files changed, 38 insertions(+), 40 deletions(-) diff --git a/src/platform/linux/cuda.cpp b/src/platform/linux/cuda.cpp index 29154172a91..fd69b6b4359 100644 --- a/src/platform/linux/cuda.cpp +++ b/src/platform/linux/cuda.cpp @@ -442,8 +442,8 @@ namespace cuda { if (sw_format == AV_PIX_FMT_YUV444P) { // Map the GL textures to read for CUDA - CUgraphicsResource resources[3] = {y_res.get(), u_res.get(), v_res.get()}; - CU_CHECK(cdf->cuGraphicsMapResources(3, resources, stream.get()), "Couldn't map GL textures in CUDA"); + std::array resources = {{y_res.get(), u_res.get(), v_res.get()}}; + CU_CHECK(cdf->cuGraphicsMapResources(resources.size(), resources.data(), stream.get()), "Couldn't map GL textures in CUDA"); // Copy from the GL textures to the target CUDA frame for (int i = 0; i < 3; i++) { @@ -460,11 +460,11 @@ namespace cuda { CU_CHECK_IGNORE(cdf->cuMemcpy2DAsync(&cpy, stream.get()), "Couldn't copy texture to CUDA frame"); } // Unmap the textures to allow modification from GL again - CU_CHECK(cdf->cuGraphicsUnmapResources(3, resources, stream.get()), "Couldn't unmap GL textures from CUDA"); + CU_CHECK(cdf->cuGraphicsUnmapResources(resources.size(), resources.data(), stream.get()), "Couldn't unmap GL textures from CUDA"); } else { - CUgraphicsResource resources[2] = {y_res.get(), uv_res.get()}; - CU_CHECK(cdf->cuGraphicsMapResources(2, resources, stream.get()), "Couldn't map GL textures in CUDA"); + std::array resources = {{y_res.get(), uv_res.get()}}; + CU_CHECK(cdf->cuGraphicsMapResources(resources.size(), resources.data(), stream.get()), "Couldn't map GL textures in CUDA"); // Copy from the GL textures to the target CUDA frame for (int i = 0; i < 2; i++) { @@ -481,7 +481,7 @@ namespace cuda { CU_CHECK_IGNORE(cdf->cuMemcpy2DAsync(&cpy, stream.get()), "Couldn't copy texture to CUDA frame"); } // Unmap the textures to allow modification from GL again - CU_CHECK(cdf->cuGraphicsUnmapResources(2, resources, stream.get()), "Couldn't unmap GL textures from CUDA"); + CU_CHECK(cdf->cuGraphicsUnmapResources(resources.size(), resources.data(), stream.get()), "Couldn't unmap GL textures from CUDA"); } diff --git a/src/platform/linux/graphics.cpp b/src/platform/linux/graphics.cpp index 261dfdda691..17671326b04 100644 --- a/src/platform/linux/graphics.cpp +++ b/src/platform/linux/graphics.cpp @@ -686,12 +686,12 @@ namespace egl { nv12->buf.bind(std::begin(nv12->tex), std::end(nv12->tex)); - GLenum attachments[] { + constexpr std::array attachments {{ GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1 - }; + }}; - for (int x = 0; x < sizeof(attachments) / sizeof(decltype(attachments[0])); ++x) { + for (size_t x = 0; x < attachments.size(); ++x) { gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, nv12->buf[x]); gl::ctx.DrawBuffers(1, &attachments[x]); gl::ctx.ClearBufferfv(GL_COLOR, 0, x == 0 ? y_black : uv_black); @@ -744,13 +744,13 @@ namespace egl { yuv444->buf.bind(std::begin(yuv444->tex), std::end(yuv444->tex)); - GLenum attachments[] { + constexpr std::array attachments {{ GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1, GL_COLOR_ATTACHMENT2 - }; + }}; - for (int x = 0; x < sizeof(attachments) / sizeof(decltype(attachments[0])); ++x) { + for (size_t x = 0; x < attachments.size(); ++x) { gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, yuv444->buf[x]); gl::ctx.DrawBuffers(1, &attachments[x]); gl::ctx.ClearBufferfv(GL_COLOR, 0, x == 0 ? y_black : uv_black); @@ -803,12 +803,12 @@ namespace egl { nv12->buf.bind(std::begin(nv12->tex), std::end(nv12->tex)); - GLenum attachments[] { + constexpr std::array attachments {{ GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1 - }; + }}; - for (int x = 0; x < sizeof(attachments) / sizeof(decltype(attachments[0])); ++x) { + for (size_t x = 0; x < attachments.size(); ++x) { gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, nv12->buf[x]); gl::ctx.DrawBuffers(1, &attachments[x]); gl::ctx.ClearBufferfv(GL_COLOR, 0, x == 0 ? y_black : uv_black); @@ -862,13 +862,13 @@ namespace egl { yuv444->buf.bind(std::begin(yuv444->tex), std::end(yuv444->tex)); - GLenum attachments[] { + constexpr std::array attachments {{ GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1, GL_COLOR_ATTACHMENT2 - }; + }}; - for (int x = 0; x < sizeof(attachments) / sizeof(decltype(attachments[0])); ++x) { + for (size_t x = 0; x < attachments.size(); ++x) { gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, yuv444->buf[x]); gl::ctx.DrawBuffers(1, &attachments[x]); gl::ctx.ClearBufferfv(GL_COLOR, 0, x == 0 ? y_black : uv_black); @@ -925,25 +925,24 @@ namespace egl { auto width_i = 1.0f / sws.out_width; { - const char *sources[] { + constexpr std::array sources {{ SUNSHINE_SHADERS_DIR "/ConvertUV.frag", SUNSHINE_SHADERS_DIR "/ConvertUV.vert", SUNSHINE_SHADERS_DIR "/ConvertY.frag", SUNSHINE_SHADERS_DIR "/Scene.vert", SUNSHINE_SHADERS_DIR "/Scene.frag", - }; + }}; - GLenum shader_type[2] { + constexpr std::array shader_type {{ GL_FRAGMENT_SHADER, GL_VERTEX_SHADER, - }; - - constexpr auto count = sizeof(sources) / sizeof(const char *); + }}; - util::Either compiled_sources[count]; + constexpr auto count = sources.size(); + std::array, count> compiled_sources; bool error_flag = false; - for (int x = 0; x < count; ++x) { + for (size_t x = 0; x < count; ++x) { auto &compiled_source = compiled_sources[x]; compiled_source = gl::shader_t::compile(file_handler::read_file(sources[x]), shader_type[x % 2]); @@ -997,15 +996,15 @@ namespace egl { gl::ctx.Uniform1fv(loc_width_i, 1, &width_i); auto color_p = video::color_vectors_from_colorspace({video::colorspace_e::rec601, false, 8}, true); - std::pair members[] { + std::array, 5> members {{ std::make_pair("color_vec_y", util::view(color_p->color_vec_y)), std::make_pair("color_vec_u", util::view(color_p->color_vec_u)), std::make_pair("color_vec_v", util::view(color_p->color_vec_v)), std::make_pair("range_y", util::view(color_p->range_y)), std::make_pair("range_uv", util::view(color_p->range_uv)), - }; + }}; - auto color_matrix = sws.program[0].uniform("ColorMatrix", members, sizeof(members) / sizeof(decltype(members[0]))); + auto color_matrix = sws.program[0].uniform("ColorMatrix", members.data(), members.size()); if (!color_matrix) { return std::nullopt; } @@ -1051,22 +1050,21 @@ namespace egl { sws.offsetY = offsetY_f; { - const char *sources[] { + constexpr std::array sources {{ SUNSHINE_SHADERS_DIR "/Scene.vert", SUNSHINE_SHADERS_DIR "/ConvertV.frag", SUNSHINE_SHADERS_DIR "/ConvertU.frag", SUNSHINE_SHADERS_DIR "/ConvertY.frag", SUNSHINE_SHADERS_DIR "/Scene.frag", - }; + }}; - GLenum shader_type[2] { + constexpr std::array shader_type {{ GL_FRAGMENT_SHADER, GL_VERTEX_SHADER, - }; + }}; - constexpr auto count = sizeof(sources) / sizeof(const char *); - - util::Either compiled_sources[count]; + constexpr auto count = sources.size(); + std::array, count> compiled_sources; bool error_flag = false; for (int x = 0; x < count; ++x) { @@ -1124,15 +1122,15 @@ namespace egl { } auto color_p = video::color_vectors_from_colorspace({video::colorspace_e::rec709, true, 8}, false); - std::pair members[] { + std::array, 5> members {{ std::make_pair("color_vec_y", util::view(color_p->color_vec_y)), std::make_pair("color_vec_u", util::view(color_p->color_vec_u)), std::make_pair("color_vec_v", util::view(color_p->color_vec_v)), std::make_pair("range_y", util::view(color_p->range_y)), std::make_pair("range_uv", util::view(color_p->range_uv)), - }; + }}; - auto color_matrix = sws.program[0].uniform("ColorMatrix", members, sizeof(members) / sizeof(decltype(members[0]))); + auto color_matrix = sws.program[0].uniform("ColorMatrix", members.data(), members.size()); if (!color_matrix) { return std::nullopt; } diff --git a/src/platform/linux/graphics.h b/src/platform/linux/graphics.h index b17bcc56543..7061430c227 100644 --- a/src/platform/linux/graphics.h +++ b/src/platform/linux/graphics.h @@ -383,7 +383,7 @@ namespace egl { // Y - shader, UV - shader, Cursor - shader : for nv12 // Y - shader, U - shader, V - shader, Cursor - shader : for yuv444 - gl::program_t program[4]; + std::array program; gl::buffer_t color_matrix; int out_width; From 0759bbc464644a6aebcc13ec29cd29caaa56d07f Mon Sep 17 00:00:00 2001 From: sheynar Date: Tue, 14 Apr 2026 21:09:22 +0300 Subject: [PATCH 6/7] fix codegate - commentary readability --- src/platform/linux/graphics.cpp | 104 +++++++++++++------------------- 1 file changed, 42 insertions(+), 62 deletions(-) diff --git a/src/platform/linux/graphics.cpp b/src/platform/linux/graphics.cpp index 17671326b04..b8b24771d91 100644 --- a/src/platform/linux/graphics.cpp +++ b/src/platform/linux/graphics.cpp @@ -651,10 +651,46 @@ namespace egl { return rgb; } - //constants for clear black color Y, U, V. U & V are same so: + // Constants for clear black color Y, U, V. U & V are same so: const float y_black[] = {0.0f, 0.0f, 0.0f, 0.0f}; const float uv_black[] = {0.5f, 0.5f, 0.5f, 0.5f}; + void nv12_bind_framebuffers(nv12_t &nv12) { + constexpr std::array attachments {{ + GL_COLOR_ATTACHMENT0, + GL_COLOR_ATTACHMENT1 + }}; + + for (size_t x = 0; x < attachments.size(); ++x) { + gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, nv12->buf[x]); + gl::ctx.DrawBuffers(1, &attachments[x]); + gl::ctx.ClearBufferfv(GL_COLOR, 0, x == 0 ? y_black : uv_black); + } + + gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, 0); + + gl_drain_errors; + } + + void yuv44_bind_framebuffers(yuv444_t &yuv444) { + + constexpr std::array attachments {{ + GL_COLOR_ATTACHMENT0, + GL_COLOR_ATTACHMENT1, + GL_COLOR_ATTACHMENT2 + }}; + + for (size_t x = 0; x < attachments.size(); ++x) { + gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, yuv444->buf[x]); + gl::ctx.DrawBuffers(1, &attachments[x]); + gl::ctx.ClearBufferfv(GL_COLOR, 0, x == 0 ? y_black : uv_black); + } + + gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, 0); + + gl_drain_errors; + } + std::optional import_target(display_t::pointer egl_display, std::array &&fds, const surface_descriptor_t &y, const surface_descriptor_t &uv) { auto y_attribs = surface_descriptor_to_egl_attribs(y); auto uv_attribs = surface_descriptor_to_egl_attribs(uv); @@ -686,25 +722,11 @@ namespace egl { nv12->buf.bind(std::begin(nv12->tex), std::end(nv12->tex)); - constexpr std::array attachments {{ - GL_COLOR_ATTACHMENT0, - GL_COLOR_ATTACHMENT1 - }}; - - for (size_t x = 0; x < attachments.size(); ++x) { - gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, nv12->buf[x]); - gl::ctx.DrawBuffers(1, &attachments[x]); - gl::ctx.ClearBufferfv(GL_COLOR, 0, x == 0 ? y_black : uv_black); - } - - gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, 0); - - gl_drain_errors; + nv12_bind_framebuffers(nv12); return nv12; } - //yuv444 version std::optional import_target_yuv444( display_t::pointer egl_display, std::array &&fds, @@ -744,21 +766,7 @@ namespace egl { yuv444->buf.bind(std::begin(yuv444->tex), std::end(yuv444->tex)); - constexpr std::array attachments {{ - GL_COLOR_ATTACHMENT0, - GL_COLOR_ATTACHMENT1, - GL_COLOR_ATTACHMENT2 - }}; - - for (size_t x = 0; x < attachments.size(); ++x) { - gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, yuv444->buf[x]); - gl::ctx.DrawBuffers(1, &attachments[x]); - gl::ctx.ClearBufferfv(GL_COLOR, 0, x == 0 ? y_black : uv_black); - } - - gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, 0); - - gl_drain_errors; + yuv44_bind_framebuffers(yuv444); return yuv444; } @@ -803,20 +811,7 @@ namespace egl { nv12->buf.bind(std::begin(nv12->tex), std::end(nv12->tex)); - constexpr std::array attachments {{ - GL_COLOR_ATTACHMENT0, - GL_COLOR_ATTACHMENT1 - }}; - - for (size_t x = 0; x < attachments.size(); ++x) { - gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, nv12->buf[x]); - gl::ctx.DrawBuffers(1, &attachments[x]); - gl::ctx.ClearBufferfv(GL_COLOR, 0, x == 0 ? y_black : uv_black); - } - - gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, 0); - - gl_drain_errors; + nv12_bind_framebuffers(nv12); return nv12; } @@ -862,21 +857,7 @@ namespace egl { yuv444->buf.bind(std::begin(yuv444->tex), std::end(yuv444->tex)); - constexpr std::array attachments {{ - GL_COLOR_ATTACHMENT0, - GL_COLOR_ATTACHMENT1, - GL_COLOR_ATTACHMENT2 - }}; - - for (size_t x = 0; x < attachments.size(); ++x) { - gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, yuv444->buf[x]); - gl::ctx.DrawBuffers(1, &attachments[x]); - gl::ctx.ClearBufferfv(GL_COLOR, 0, x == 0 ? y_black : uv_black); - } - - gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, 0); - - gl_drain_errors; + yuv44_bind_framebuffers(yuv444); return yuv444; } @@ -1231,8 +1212,7 @@ namespace egl { gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, cursor_framebuffer[0]); - //nv12 cursor program[2] - //yuv444 cursor program[3] + // For NV12 cursor program index is 2, for YUV444 it's 3 const int cursor_program = is_yuv444 ? 3 : 2; gl::ctx.UseProgram(program[cursor_program].handle()); From 500507282ac0b6051f6aca74acdae1e5078de874 Mon Sep 17 00:00:00 2001 From: sheynar Date: Sun, 26 Apr 2026 17:17:55 +0300 Subject: [PATCH 7/7] fix codegate - code duplications --- src/platform/linux/graphics.cpp | 134 +++++++++++++++----------------- src/platform/linux/graphics.h | 3 + 2 files changed, 66 insertions(+), 71 deletions(-) diff --git a/src/platform/linux/graphics.cpp b/src/platform/linux/graphics.cpp index b8b24771d91..7e6178c8120 100644 --- a/src/platform/linux/graphics.cpp +++ b/src/platform/linux/graphics.cpp @@ -880,6 +880,39 @@ namespace egl { program[2].bind(color_matrix); } + int configure_sws_pipeline(sws_t &sws, const video::color_t *color_p, gl::tex_t &&tex, bool is_yuv444) { + std::array, 5> members {{ + std::make_pair("color_vec_y", util::view(color_p->color_vec_y)), + std::make_pair("color_vec_u", util::view(color_p->color_vec_u)), + std::make_pair("color_vec_v", util::view(color_p->color_vec_v)), + std::make_pair("range_y", util::view(color_p->range_y)), + std::make_pair("range_uv", util::view(color_p->range_uv)), + }}; + + auto color_matrix = sws.program[0].uniform("ColorMatrix", members.data(), members.size()); + if (!color_matrix) { + return -1; + } + + sws.color_matrix = std::move(*color_matrix); + + sws.tex = std::move(tex); + + sws.cursor_framebuffer = gl::frame_buf_t::make(1); + sws.cursor_framebuffer.bind(&sws.tex[0], &sws.tex[1]); + + int programCount = is_yuv444 ? 3 : 2; + + for (int i = 0; i < programCount; i++) { + sws.program[i].bind(sws.color_matrix); + } + + gl::ctx.BlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + + gl_drain_errors; + return 0; + } + std::optional sws_t::make_nv12(int in_width, int in_height, int out_width, int out_height, gl::tex_t &&tex) { sws_t sws; @@ -977,33 +1010,12 @@ namespace egl { gl::ctx.Uniform1fv(loc_width_i, 1, &width_i); auto color_p = video::color_vectors_from_colorspace({video::colorspace_e::rec601, false, 8}, true); - std::array, 5> members {{ - std::make_pair("color_vec_y", util::view(color_p->color_vec_y)), - std::make_pair("color_vec_u", util::view(color_p->color_vec_u)), - std::make_pair("color_vec_v", util::view(color_p->color_vec_v)), - std::make_pair("range_y", util::view(color_p->range_y)), - std::make_pair("range_uv", util::view(color_p->range_uv)), - }}; - auto color_matrix = sws.program[0].uniform("ColorMatrix", members.data(), members.size()); - if (!color_matrix) { + int pipeline = configure_sws_pipeline(sws, color_p, std::move(tex), true); + if (pipeline < 0) { return std::nullopt; } - sws.color_matrix = std::move(*color_matrix); - - sws.tex = std::move(tex); - - sws.cursor_framebuffer = gl::frame_buf_t::make(1); - sws.cursor_framebuffer.bind(&sws.tex[0], &sws.tex[1]); - - sws.program[0].bind(sws.color_matrix); - sws.program[1].bind(sws.color_matrix); - - gl::ctx.BlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - - gl_drain_errors; - return sws; } @@ -1103,34 +1115,12 @@ namespace egl { } auto color_p = video::color_vectors_from_colorspace({video::colorspace_e::rec709, true, 8}, false); - std::array, 5> members {{ - std::make_pair("color_vec_y", util::view(color_p->color_vec_y)), - std::make_pair("color_vec_u", util::view(color_p->color_vec_u)), - std::make_pair("color_vec_v", util::view(color_p->color_vec_v)), - std::make_pair("range_y", util::view(color_p->range_y)), - std::make_pair("range_uv", util::view(color_p->range_uv)), - }}; - auto color_matrix = sws.program[0].uniform("ColorMatrix", members.data(), members.size()); - if (!color_matrix) { + int pipeline = configure_sws_pipeline(sws, color_p, std::move(tex), true); + if (pipeline < 0) { return std::nullopt; } - sws.color_matrix = std::move(*color_matrix); - - sws.tex = std::move(tex); - - sws.cursor_framebuffer = gl::frame_buf_t::make(1); - sws.cursor_framebuffer.bind(&sws.tex[0], &sws.tex[1]); - - sws.program[0].bind(sws.color_matrix); - sws.program[1].bind(sws.color_matrix); - sws.program[2].bind(sws.color_matrix); - - gl::ctx.BlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - - gl_drain_errors; - return sws; } @@ -1256,15 +1246,8 @@ namespace egl { } } - int sws_t::convert_nv12(gl::frame_buf_t &fb) { - gl::ctx.BindTexture(GL_TEXTURE_2D, loaded_texture); - - GLenum attachments[] { - GL_COLOR_ATTACHMENT0, - GL_COLOR_ATTACHMENT1 - }; - - for (int x = 0; x < sizeof(attachments) / sizeof(decltype(attachments[0])); ++x) { + int sws_t::draw_programs_to_buffers (GLenum attachments[], gl::frame_buf_t &fb, int count, bool is_yuv444) { + for (int x = 0; x < count; ++x) { gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, fb[x]); gl::ctx.DrawBuffers(1, &attachments[x]); @@ -1276,10 +1259,29 @@ namespace egl { } #endif + int sizeCoef = is_yuv444 ? 1 : x + 1; + gl::ctx.UseProgram(program[x].handle()); - gl::ctx.Viewport(offsetX / (x + 1), offsetY / (x + 1), out_width / (x + 1), out_height / (x + 1)); + gl::ctx.Viewport(offsetX/sizeCoef, offsetY/sizeCoef, out_width/sizeCoef, out_height/sizeCoef); gl::ctx.DrawArrays(GL_TRIANGLES, 0, 3); } + return 0; + } + + int sws_t::convert_nv12(gl::frame_buf_t &fb) { + gl::ctx.BindTexture(GL_TEXTURE_2D, loaded_texture); + + GLenum attachments[] { + GL_COLOR_ATTACHMENT0, + GL_COLOR_ATTACHMENT1 + }; + + int attachmentsCount = sizeof(attachments) / sizeof(decltype(attachments[0])); + + int drawBuffers = draw_programs_to_buffers(attachments, fb, attachmentsCount, false); + if (drawBuffers < 0) { + return -1; + } gl::ctx.BindTexture(GL_TEXTURE_2D, 0); @@ -1297,21 +1299,11 @@ namespace egl { GL_COLOR_ATTACHMENT2 }; - for (int x = 0; x < sizeof(attachments) / sizeof(decltype(attachments[0])); ++x) { - gl::ctx.BindFramebuffer(GL_FRAMEBUFFER, fb[x]); - gl::ctx.DrawBuffers(1, &attachments[x]); + int attachmentsCount = sizeof(attachments) / sizeof(decltype(attachments[0])); - #ifndef NDEBUG - auto status = gl::ctx.CheckFramebufferStatus(GL_FRAMEBUFFER); - if (status != GL_FRAMEBUFFER_COMPLETE) { - BOOST_LOG(error) << "Pass "sv << x << ": CheckFramebufferStatus() --> [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; - } - #endif - - gl::ctx.UseProgram(program[x].handle()); - gl::ctx.Viewport(offsetX, offsetY, out_width, out_height); - gl::ctx.DrawArrays(GL_TRIANGLES, 0, 3); + int drawBuffers = draw_programs_to_buffers(attachments, fb, attachmentsCount, true); + if (drawBuffers < 0) { + return -1; } gl::ctx.BindTexture(GL_TEXTURE_2D, 0); diff --git a/src/platform/linux/graphics.h b/src/platform/linux/graphics.h index 7061430c227..4d63321a2e4 100644 --- a/src/platform/linux/graphics.h +++ b/src/platform/linux/graphics.h @@ -365,6 +365,9 @@ namespace egl { // Convert the loaded image into the first three framebuffers int convert_yuv444(gl::frame_buf_t &fb); + // Draw loaded image by programs to frame buffers + int draw_programs_to_buffers (GLenum attachments[], gl::frame_buf_t &fb, int count, bool is_yuv444); + // Make an area of the image black int blank(gl::frame_buf_t &fb, int offsetX_, int offsetY_, int width, int height, AVPixelFormat format);