[PATCH v2] libcamera: debayer_cpu: Add 32bits/aligned output formats
Robert Mader
robert.mader at collabora.com
Tue Jun 18 08:34:30 CEST 2024
On 18.06.24 01:10, Laurent Pinchart wrote:
> Hi Robert,
>
> Thank you for the patch.
>
> On Mon, Jun 17, 2024 at 07:41:19PM +0200, Robert Mader wrote:
>> In order to be more compatible with modern hardware and APIs. This
>> notably allows GL implementations to directly import the buffers more
>> often and seems to be required for Wayland.
>>
>> Further more, as we already enforce a 8 byte stride, these formats work
>> better for clients that don't support padding - such as libwebrtc at the
>> time of writing.
>>
>> Tested devices:
>> - Librem5
>> - PinePhone
>> - Thinkpad X13s
>>
>> Signed-off-by: Robert Mader <robert.mader at collabora.com>
>> Tested-by: Kieran Bingham <kieran.bingham at ideasonboard.com>
>> ---
>> src/libcamera/software_isp/debayer_cpu.cpp | 101 ++++++++++++++-------
>> src/libcamera/software_isp/debayer_cpu.h | 1 +
>> 2 files changed, 68 insertions(+), 34 deletions(-)
>>
>> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
>> index c038eed4..79bb4d87 100644
>> --- a/src/libcamera/software_isp/debayer_cpu.cpp
>> +++ b/src/libcamera/software_isp/debayer_cpu.cpp
>> @@ -70,10 +70,11 @@ DebayerCpu::~DebayerCpu()
>> * GBG
>> * RGR
>> */
>> -#define BGGR_BGR888(p, n, div) \
>> +#define BGGR_BGR888(p, n, div, addAlphaBit) \
>> *dst++ = blue_[curr[x] / (div)]; \
>> *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))]; \
>> *dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
>> + if (addAlphaBit) *dst++ = 255; \
> A conditional in here will slow everything down very significantly, not
> just for the new 32-bit formats, but for the existing 24-bit formats. I
> don't expect Milan and Hans to be very happy about it. We need a way to
> get this conditional optimized out at compile time.
>
> Function templates could help avoiding the duplication of the macros
> while guaranteeing compile-time optimization.
Tried this in v3 now, turned out quite nicely IMO.
>
>> x++;
>>
>> /*
>> @@ -81,10 +82,11 @@ DebayerCpu::~DebayerCpu()
>> * RGR
>> * GBG
>> */
>> -#define GRBG_BGR888(p, n, div) \
>> +#define GRBG_BGR888(p, n, div, addAlphaBit) \
>> *dst++ = blue_[(prev[x] + next[x]) / (2 * (div))]; \
>> *dst++ = green_[curr[x] / (div)]; \
>> *dst++ = red_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
>> + if (addAlphaBit) *dst++ = 255; \
>> x++;
>>
>> /*
>> @@ -92,10 +94,11 @@ DebayerCpu::~DebayerCpu()
>> * BGB
>> * GRG
>> */
>> -#define GBRG_BGR888(p, n, div) \
>> +#define GBRG_BGR888(p, n, div, addAlphaBit) \
>> *dst++ = blue_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
>> *dst++ = green_[curr[x] / (div)]; \
>> *dst++ = red_[(prev[x] + next[x]) / (2 * (div))]; \
>> + if (addAlphaBit) *dst++ = 255; \
>> x++;
>>
>> /*
>> @@ -103,10 +106,11 @@ DebayerCpu::~DebayerCpu()
>> * GRG
>> * BGB
>> */
>> -#define RGGB_BGR888(p, n, div) \
>> +#define RGGB_BGR888(p, n, div, addAlphaBit) \
>> *dst++ = blue_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
>> *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))]; \
>> *dst++ = red_[curr[x] / (div)]; \
>> + if (addAlphaBit) *dst++ = 255; \
>> x++;
>>
>> void DebayerCpu::debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>> @@ -114,8 +118,8 @@ void DebayerCpu::debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>> DECLARE_SRC_POINTERS(uint8_t)
>>
>> for (int x = 0; x < (int)window_.width;) {
>> - BGGR_BGR888(1, 1, 1)
>> - GBRG_BGR888(1, 1, 1)
>> + BGGR_BGR888(1, 1, 1, addAlphaBit_)
>> + GBRG_BGR888(1, 1, 1, addAlphaBit_)
>> }
>> }
>>
>> @@ -124,8 +128,8 @@ void DebayerCpu::debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>> DECLARE_SRC_POINTERS(uint8_t)
>>
>> for (int x = 0; x < (int)window_.width;) {
>> - GRBG_BGR888(1, 1, 1)
>> - RGGB_BGR888(1, 1, 1)
>> + GRBG_BGR888(1, 1, 1, addAlphaBit_)
>> + RGGB_BGR888(1, 1, 1, addAlphaBit_)
>> }
>> }
>>
>> @@ -135,8 +139,8 @@ void DebayerCpu::debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>>
>> for (int x = 0; x < (int)window_.width;) {
>> /* divide values by 4 for 10 -> 8 bpp value */
>> - BGGR_BGR888(1, 1, 4)
>> - GBRG_BGR888(1, 1, 4)
>> + BGGR_BGR888(1, 1, 4, addAlphaBit_)
>> + GBRG_BGR888(1, 1, 4, addAlphaBit_)
>> }
>> }
>>
>> @@ -146,8 +150,8 @@ void DebayerCpu::debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>>
>> for (int x = 0; x < (int)window_.width;) {
>> /* divide values by 4 for 10 -> 8 bpp value */
>> - GRBG_BGR888(1, 1, 4)
>> - RGGB_BGR888(1, 1, 4)
>> + GRBG_BGR888(1, 1, 4, addAlphaBit_)
>> + RGGB_BGR888(1, 1, 4, addAlphaBit_)
>> }
>> }
>>
>> @@ -157,8 +161,8 @@ void DebayerCpu::debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>>
>> for (int x = 0; x < (int)window_.width;) {
>> /* divide values by 16 for 12 -> 8 bpp value */
>> - BGGR_BGR888(1, 1, 16)
>> - GBRG_BGR888(1, 1, 16)
>> + BGGR_BGR888(1, 1, 16, addAlphaBit_)
>> + GBRG_BGR888(1, 1, 16, addAlphaBit_)
>> }
>> }
>>
>> @@ -168,8 +172,8 @@ void DebayerCpu::debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>>
>> for (int x = 0; x < (int)window_.width;) {
>> /* divide values by 16 for 12 -> 8 bpp value */
>> - GRBG_BGR888(1, 1, 16)
>> - RGGB_BGR888(1, 1, 16)
>> + GRBG_BGR888(1, 1, 16, addAlphaBit_)
>> + RGGB_BGR888(1, 1, 16, addAlphaBit_)
>> }
>> }
>>
>> @@ -187,12 +191,12 @@ void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
>> */
>> for (int x = 0; x < widthInBytes;) {
>> /* First pixel */
>> - BGGR_BGR888(2, 1, 1)
>> + BGGR_BGR888(2, 1, 1, addAlphaBit_)
>> /* Second pixel BGGR -> GBRG */
>> - GBRG_BGR888(1, 1, 1)
>> + GBRG_BGR888(1, 1, 1, addAlphaBit_)
>> /* Same thing for third and fourth pixels */
>> - BGGR_BGR888(1, 1, 1)
>> - GBRG_BGR888(1, 2, 1)
>> + BGGR_BGR888(1, 1, 1, addAlphaBit_)
>> + GBRG_BGR888(1, 2, 1, addAlphaBit_)
>> /* Skip 5th src byte with 4 x 2 least-significant-bits */
>> x++;
>> }
>> @@ -207,12 +211,12 @@ void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
>>
>> for (int x = 0; x < widthInBytes;) {
>> /* First pixel */
>> - GRBG_BGR888(2, 1, 1)
>> + GRBG_BGR888(2, 1, 1, addAlphaBit_)
>> /* Second pixel GRBG -> RGGB */
>> - RGGB_BGR888(1, 1, 1)
>> + RGGB_BGR888(1, 1, 1, addAlphaBit_)
>> /* Same thing for third and fourth pixels */
>> - GRBG_BGR888(1, 1, 1)
>> - RGGB_BGR888(1, 2, 1)
>> + GRBG_BGR888(1, 1, 1, addAlphaBit_)
>> + RGGB_BGR888(1, 2, 1, addAlphaBit_)
>> /* Skip 5th src byte with 4 x 2 least-significant-bits */
>> x++;
>> }
>> @@ -227,12 +231,12 @@ void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[])
>>
>> for (int x = 0; x < widthInBytes;) {
>> /* Even pixel */
>> - GBRG_BGR888(2, 1, 1)
>> + GBRG_BGR888(2, 1, 1, addAlphaBit_)
>> /* Odd pixel GBGR -> BGGR */
>> - BGGR_BGR888(1, 1, 1)
>> + BGGR_BGR888(1, 1, 1, addAlphaBit_)
>> /* Same thing for next 2 pixels */
>> - GBRG_BGR888(1, 1, 1)
>> - BGGR_BGR888(1, 2, 1)
>> + GBRG_BGR888(1, 1, 1, addAlphaBit_)
>> + BGGR_BGR888(1, 2, 1, addAlphaBit_)
>> /* Skip 5th src byte with 4 x 2 least-significant-bits */
>> x++;
>> }
>> @@ -247,12 +251,12 @@ void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[])
>>
>> for (int x = 0; x < widthInBytes;) {
>> /* Even pixel */
>> - RGGB_BGR888(2, 1, 1)
>> + RGGB_BGR888(2, 1, 1, addAlphaBit_)
>> /* Odd pixel RGGB -> GRBG */
>> - GRBG_BGR888(1, 1, 1)
>> + GRBG_BGR888(1, 1, 1, addAlphaBit_)
>> /* Same thing for next 2 pixels */
>> - RGGB_BGR888(1, 1, 1)
>> - GRBG_BGR888(1, 2, 1)
>> + RGGB_BGR888(1, 1, 1, addAlphaBit_)
>> + GRBG_BGR888(1, 2, 1, addAlphaBit_)
>> /* Skip 5th src byte with 4 x 2 least-significant-bits */
>> x++;
>> }
>> @@ -280,7 +284,14 @@ int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &conf
>> config.bpp = (bayerFormat.bitDepth + 7) & ~7;
>> config.patternSize.width = 2;
>> config.patternSize.height = 2;
>> - config.outputFormats = std::vector<PixelFormat>({ formats::RGB888, formats::BGR888 });
>> + config.outputFormats = std::vector<PixelFormat>({
>> + formats::RGB888,
>> + formats::XRGB8888,
>> + formats::ARGB8888,
>> + formats::BGR888,
>> + formats::XBGR8888,
>> + formats::ABGR8888
>> + });
>> return 0;
>> }
>>
>> @@ -290,7 +301,14 @@ int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &conf
>> config.bpp = 10;
>> config.patternSize.width = 4; /* 5 bytes per *4* pixels */
>> config.patternSize.height = 2;
>> - config.outputFormats = std::vector<PixelFormat>({ formats::RGB888, formats::BGR888 });
>> + config.outputFormats = std::vector<PixelFormat>({
>> + formats::RGB888,
>> + formats::XRGB8888,
>> + formats::ARGB8888,
>> + formats::BGR888,
>> + formats::XBGR8888,
>> + formats::ABGR8888
>> + });
>> return 0;
>> }
>>
>> @@ -306,6 +324,12 @@ int DebayerCpu::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &c
>> return 0;
>> }
>>
>> + if (outputFormat == formats::XRGB8888 || outputFormat == formats::ARGB8888 ||
>> + outputFormat == formats::XBGR8888 || outputFormat == formats::ABGR8888) {
>> + config.bpp = 32;
>> + return 0;
>> + }
>> +
>> LOG(Debayer, Info)
>> << "Unsupported output format " << outputFormat.toString();
>> return -EINVAL;
>> @@ -344,6 +368,7 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
>>
>> xShift_ = 0;
>> swapRedBlueGains_ = false;
>> + addAlphaBit_ = false;
>>
>> auto invalidFmt = []() -> int {
>> LOG(Debayer, Error) << "Unsupported input output format combination";
>> @@ -351,8 +376,16 @@ int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputF
>> };
>>
>> switch (outputFormat) {
>> + case formats::XRGB8888:
>> + case formats::ARGB8888:
>> + addAlphaBit_ = true;
>> + [[fallthrough]];
>> case formats::RGB888:
>> break;
>> + case formats::XBGR8888:
>> + case formats::ABGR8888:
>> + addAlphaBit_ = true;
>> + [[fallthrough]];
>> case formats::BGR888:
>> /* Swap R and B in bayer order to generate BGR888 instead of RGB888 */
>> swapRedBlueGains_ = true;
>> diff --git a/src/libcamera/software_isp/debayer_cpu.h b/src/libcamera/software_isp/debayer_cpu.h
>> index be7dcdca..4f77600d 100644
>> --- a/src/libcamera/software_isp/debayer_cpu.h
>> +++ b/src/libcamera/software_isp/debayer_cpu.h
>> @@ -132,6 +132,7 @@ private:
>> debayerFn debayer1_;
>> debayerFn debayer2_;
>> debayerFn debayer3_;
>> + bool addAlphaBit_;
>> Rectangle window_;
>> DebayerInputConfig inputConfig_;
>> DebayerOutputConfig outputConfig_;
--
Robert Mader
Consultant Software Developer
Collabora Ltd.
Platinum Building, St John's Innovation Park, Cambridge CB4 0DS, UK
Registered in England & Wales, no. 5513718
More information about the libcamera-devel
mailing list