[PATCH v6 08/18] libcamera: software_isp: Add DebayerCpu class
Laurent Pinchart
laurent.pinchart at ideasonboard.com
Wed Mar 27 19:01:32 CET 2024
Another comment.
On Wed, Mar 27, 2024 at 04:12:25PM +0200, Laurent Pinchart wrote:
> Hi Milan and Hans,
>
> Thank you for the patch.
>
> On Tue, Mar 19, 2024 at 01:35:55PM +0100, Milan Zamazal wrote:
> > From: Hans de Goede <hdegoede at redhat.com>
> >
> > Add CPU based debayering implementation. This initial implementation
> > only supports debayering packed 10 bits per pixel bayer data in
> > the 4 standard bayer orders.
> >
> > Doxygen documentation by Dennis Bonke.
> >
> > Tested-by: Bryan O'Donoghue <bryan.odonoghue at linaro.org> # sc8280xp Lenovo x13s
> > Tested-by: Pavel Machek <pavel at ucw.cz>
> > Reviewed-by: Pavel Machek <pavel at ucw.cz>
> > Co-developed-by: Dennis Bonke <admin at dennisbonke.com>
> > Signed-off-by: Dennis Bonke <admin at dennisbonke.com>
> > Co-developed-by: Andrey Konovalov <andrey.konovalov at linaro.org>
> > Signed-off-by: Andrey Konovalov <andrey.konovalov at linaro.org>
> > Co-developed-by: Pavel Machek <pavel at ucw.cz>
> > Signed-off-by: Pavel Machek <pavel at ucw.cz>
> > Signed-off-by: Hans de Goede <hdegoede at redhat.com>
> > Reviewed-by: Milan Zamazal <mzamazal at redhat.com>
> > ---
> > src/libcamera/software_isp/debayer_cpu.cpp | 626 +++++++++++++++++++++
> > src/libcamera/software_isp/debayer_cpu.h | 143 +++++
> > src/libcamera/software_isp/meson.build | 1 +
> > 3 files changed, 770 insertions(+)
> > create mode 100644 src/libcamera/software_isp/debayer_cpu.cpp
> > create mode 100644 src/libcamera/software_isp/debayer_cpu.h
> >
> > diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
> > new file mode 100644
> > index 00000000..f932362c
> > --- /dev/null
> > +++ b/src/libcamera/software_isp/debayer_cpu.cpp
> > @@ -0,0 +1,626 @@
> > +/* SPDX-License-Identifier: LGPL-2.1-or-later */
> > +/*
> > + * Copyright (C) 2023, Linaro Ltd
> > + * Copyright (C) 2023, Red Hat Inc.
> > + *
> > + * Authors:
> > + * Hans de Goede <hdegoede at redhat.com>
> > + *
> > + * debayer_cpu.cpp - CPU based debayering class
> > + */
> > +
> > +#include "debayer_cpu.h"
> > +
> > +#include <math.h>
> > +#include <stdlib.h>
> > +#include <time.h>
> > +
> > +#include <libcamera/formats.h>
> > +
> > +#include "libcamera/internal/bayer_format.h"
> > +#include "libcamera/internal/framebuffer.h"
> > +#include "libcamera/internal/mapped_framebuffer.h"
> > +
> > +namespace libcamera {
> > +
> > +/**
> > + * \class DebayerCpu
> > + * \brief Class for debayering on the CPU
> > + *
> > + * Implementation for CPU based debayering
> > + */
> > +
> > +/**
> > + * \brief Constructs a DebayerCpu object.
> > + * \param[in] stats Pointer to the stats object to use.
>
> No period at the end of \brief or \param.
>
> > + */
> > +DebayerCpu::DebayerCpu(std::unique_ptr<SwStatsCpu> stats)
> > + : stats_(std::move(stats)), gamma_correction_(1.0)
>
> gammaCorrection_
>
> > +{
> > +#ifdef __x86_64__
> > + enableInputMemcpy_ = false;
> > +#else
> > + enableInputMemcpy_ = true;
> > +#endif
>
> This is very much *not* nice :-( It needs to at least be explained
> clearly somewhere.
>
> > + /* Initialize gamma to 1.0 curve */
> > + for (unsigned int i = 0; i < kGammaLookupSize; i++)
> > + gamma_[i] = i / (kGammaLookupSize / kRGBLookupSize);
> > +
> > + for (unsigned int i = 0; i < kMaxLineBuffers; i++)
> > + lineBuffers_[i] = nullptr;
> > +}
> > +
> > +DebayerCpu::~DebayerCpu()
> > +{
> > + for (unsigned int i = 0; i < kMaxLineBuffers; i++)
> > + free(lineBuffers_[i]);
> > +}
> > +
> > +// RGR
> > +// GBG
> > +// RGR
>
> C-style comments.
>
> > +#define BGGR_BGR888(p, n, div) \
> > + *dst++ = blue_[curr[x] / (div)]; \
> > + *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))]; \
> > + *dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
> > + x++;
> > +
> > +// GBG
> > +// RGR
> > +// GBG
> > +#define GRBG_BGR888(p, n, div) \
> > + *dst++ = blue_[(prev[x] + next[x]) / (2 * (div))]; \
> > + *dst++ = green_[curr[x] / (div)]; \
> > + *dst++ = red_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
> > + x++;
> > +
> > +// GRG
> > +// BGB
> > +// GRG
> > +#define GBRG_BGR888(p, n, div) \
> > + *dst++ = blue_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
> > + *dst++ = green_[curr[x] / (div)]; \
> > + *dst++ = red_[(prev[x] + next[x]) / (2 * (div))]; \
> > + x++;
> > +
> > +// BGB
> > +// GRG
> > +// BGB
> > +#define RGGB_BGR888(p, n, div) \
> > + *dst++ = blue_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
> > + *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))]; \
> > + *dst++ = red_[curr[x] / (div)]; \
> > + x++;
> > +
> > +void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
> > +{
> > + const int width_in_bytes = window_.width * 5 / 4;
>
> snakeCase here too, ane where applicable everywhere else.
>
> > + const uint8_t *prev = (const uint8_t *)src[0];
>
> Is the cast needed ? If so it should be a C++ cast.
>
> > + const uint8_t *curr = (const uint8_t *)src[1];
> > + const uint8_t *next = (const uint8_t *)src[2];
> > +
> > + /*
> > + * For the first pixel getting a pixel from the previous column uses
> > + * x - 2 to skip the 5th byte with least-significant bits for 4 pixels.
> > + * Same for last pixel (uses x + 2) and looking at the next column.
> > + */
> > + for (int x = 0; x < width_in_bytes;) {
> > + /* First pixel */
> > + BGGR_BGR888(2, 1, 1)
> > + /* Second pixel BGGR -> GBRG */
> > + GBRG_BGR888(1, 1, 1)
> > + /* Same thing for third and fourth pixels */
> > + BGGR_BGR888(1, 1, 1)
> > + GBRG_BGR888(1, 2, 1)
> > + /* Skip 5th src byte with 4 x 2 least-significant-bits */
> > + x++;
> > + }
> > +}
> > +
> > +void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
> > +{
> > + const int width_in_bytes = window_.width * 5 / 4;
> > + const uint8_t *prev = (const uint8_t *)src[0];
> > + const uint8_t *curr = (const uint8_t *)src[1];
> > + const uint8_t *next = (const uint8_t *)src[2];
> > +
> > + for (int x = 0; x < width_in_bytes;) {
> > + /* First pixel */
> > + GRBG_BGR888(2, 1, 1)
> > + /* Second pixel GRBG -> RGGB */
> > + RGGB_BGR888(1, 1, 1)
> > + /* Same thing for third and fourth pixels */
> > + GRBG_BGR888(1, 1, 1)
> > + RGGB_BGR888(1, 2, 1)
> > + /* Skip 5th src byte with 4 x 2 least-significant-bits */
> > + x++;
> > + }
> > +}
> > +
> > +void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[])
> > +{
> > + const int width_in_bytes = window_.width * 5 / 4;
> > + const uint8_t *prev = (const uint8_t *)src[0];
> > + const uint8_t *curr = (const uint8_t *)src[1];
> > + const uint8_t *next = (const uint8_t *)src[2];
> > +
> > + for (int x = 0; x < width_in_bytes;) {
> > + /* Even pixel */
> > + GBRG_BGR888(2, 1, 1)
> > + /* Odd pixel GBGR -> BGGR */
> > + BGGR_BGR888(1, 1, 1)
> > + /* Same thing for next 2 pixels */
> > + GBRG_BGR888(1, 1, 1)
> > + BGGR_BGR888(1, 2, 1)
> > + /* Skip 5th src byte with 4 x 2 least-significant-bits */
> > + x++;
> > + }
> > +}
> > +
> > +void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[])
> > +{
> > + const int width_in_bytes = window_.width * 5 / 4;
> > + const uint8_t *prev = (const uint8_t *)src[0];
> > + const uint8_t *curr = (const uint8_t *)src[1];
> > + const uint8_t *next = (const uint8_t *)src[2];
> > +
> > + for (int x = 0; x < width_in_bytes;) {
> > + /* Even pixel */
> > + RGGB_BGR888(2, 1, 1)
> > + /* Odd pixel RGGB -> GRBG */
> > + GRBG_BGR888(1, 1, 1)
> > + /* Same thing for next 2 pixels */
> > + RGGB_BGR888(1, 1, 1)
> > + GRBG_BGR888(1, 2, 1)
> > + /* Skip 5th src byte with 4 x 2 least-significant-bits */
> > + x++;
> > + }
> > +}
> > +
> > +static bool isStandardBayerOrder(BayerFormat::Order order)
> > +{
> > + return order == BayerFormat::BGGR || order == BayerFormat::GBRG ||
> > + order == BayerFormat::GRBG || order == BayerFormat::RGGB;
> > +}
> > +
> > +/*
> > + * Setup the Debayer object according to the passed in parameters.
> > + * Return 0 on success, a negative errno value on failure
> > + * (unsupported parameters).
> > + */
> > +int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config)
> > +{
> > + BayerFormat bayerFormat =
> > + BayerFormat::fromPixelFormat(inputFormat);
> > +
> > + if (bayerFormat.bitDepth == 10 &&
> > + bayerFormat.packing == BayerFormat::Packing::CSI2 &&
> > + isStandardBayerOrder(bayerFormat.order)) {
> > + config.bpp = 10;
> > + config.patternSize.width = 4; /* 5 bytes per *4* pixels */
> > + config.patternSize.height = 2;
> > + config.outputFormats = std::vector<PixelFormat>({ formats::RGB888 });
> > + return 0;
> > + }
> > +
> > + LOG(Debayer, Info)
>
> Shouldn't this be an Error mesage ? Same below.
>
> > + << "Unsupported input format " << inputFormat.toString();
> > + return -EINVAL;
> > +}
> > +
> > +int DebayerCpu::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config)
> > +{
> > + if (outputFormat == formats::RGB888) {
> > + config.bpp = 24;
> > + return 0;
> > + }
> > +
> > + LOG(Debayer, Info)
> > + << "Unsupported output format " << outputFormat.toString();
> > + return -EINVAL;
> > +}
> > +
> > +/* TODO: this ignores outputFormat since there is only 1 supported outputFormat for now */
/* \todo This ignores outputFormat since there is only 1 supported outputFormat for now */
> > +int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, [[maybe_unused]] PixelFormat outputFormat)
> > +{
> > + BayerFormat bayerFormat =
> > + BayerFormat::fromPixelFormat(inputFormat);
> > +
> > + if (bayerFormat.bitDepth == 10 &&
> > + bayerFormat.packing == BayerFormat::Packing::CSI2) {
> > + switch (bayerFormat.order) {
> > + case BayerFormat::BGGR:
> > + debayer0_ = &DebayerCpu::debayer10P_BGBG_BGR888;
> > + debayer1_ = &DebayerCpu::debayer10P_GRGR_BGR888;
> > + return 0;
> > + case BayerFormat::GBRG:
> > + debayer0_ = &DebayerCpu::debayer10P_GBGB_BGR888;
> > + debayer1_ = &DebayerCpu::debayer10P_RGRG_BGR888;
> > + return 0;
> > + case BayerFormat::GRBG:
> > + debayer0_ = &DebayerCpu::debayer10P_GRGR_BGR888;
> > + debayer1_ = &DebayerCpu::debayer10P_BGBG_BGR888;
> > + return 0;
> > + case BayerFormat::RGGB:
> > + debayer0_ = &DebayerCpu::debayer10P_RGRG_BGR888;
> > + debayer1_ = &DebayerCpu::debayer10P_GBGB_BGR888;
> > + return 0;
> > + default:
> > + break;
> > + }
> > + }
> > +
> > + LOG(Debayer, Error) << "Unsupported input output format combination";
> > + return -EINVAL;
> > +}
> > +
> > +int DebayerCpu::configure(const StreamConfiguration &inputCfg,
> > + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs)
> > +{
> > + if (getInputConfig(inputCfg.pixelFormat, inputConfig_) != 0)
> > + return -EINVAL;
> > +
> > + if (stats_->configure(inputCfg) != 0)
> > + return -EINVAL;
> > +
> > + const Size &stats_pattern_size = stats_->patternSize();
> > + if (inputConfig_.patternSize.width != stats_pattern_size.width ||
> > + inputConfig_.patternSize.height != stats_pattern_size.height) {
> > + LOG(Debayer, Error)
> > + << "mismatching stats and debayer pattern sizes for "
> > + << inputCfg.pixelFormat.toString();
> > + return -EINVAL;
> > + }
> > +
> > + inputConfig_.stride = inputCfg.stride;
> > +
> > + if (outputCfgs.size() != 1) {
> > + LOG(Debayer, Error)
> > + << "Unsupported number of output streams: "
> > + << outputCfgs.size();
> > + return -EINVAL;
> > + }
> > +
> > + const StreamConfiguration &outputCfg = outputCfgs[0];
> > + SizeRange outSizeRange = sizes(inputCfg.pixelFormat, inputCfg.size);
> > + std::tie(outputConfig_.stride, outputConfig_.frameSize) =
> > + strideAndFrameSize(outputCfg.pixelFormat, outputCfg.size);
> > +
> > + if (!outSizeRange.contains(outputCfg.size) || outputConfig_.stride != outputCfg.stride) {
> > + LOG(Debayer, Error)
> > + << "Invalid output size/stride: "
> > + << "\n " << outputCfg.size << " (" << outSizeRange << ")"
> > + << "\n " << outputCfg.stride << " (" << outputConfig_.stride << ")";
> > + return -EINVAL;
> > + }
> > +
> > + if (setDebayerFunctions(inputCfg.pixelFormat, outputCfg.pixelFormat) != 0)
> > + return -EINVAL;
> > +
> > + window_.x = ((inputCfg.size.width - outputCfg.size.width) / 2) &
> > + ~(inputConfig_.patternSize.width - 1);
> > + window_.y = ((inputCfg.size.height - outputCfg.size.height) / 2) &
> > + ~(inputConfig_.patternSize.height - 1);
> > + window_.width = outputCfg.size.width;
> > + window_.height = outputCfg.size.height;
> > +
> > + /* Don't pass x,y since process() already adjusts src before passing it */
> > + stats_->setWindow(Rectangle(window_.size()));
> > +
> > + /* pad with patternSize.Width on both left and right side */
> > + lineBufferPadding_ = inputConfig_.patternSize.width * inputConfig_.bpp / 8;
> > + lineBufferLength_ = window_.width * inputConfig_.bpp / 8 +
> > + 2 * lineBufferPadding_;
> > + for (unsigned int i = 0;
> > + i < (inputConfig_.patternSize.height + 1) && enableInputMemcpy_;
> > + i++) {
> > + free(lineBuffers_[i]);
> > + lineBuffers_[i] = (uint8_t *)malloc(lineBufferLength_);
> > + if (!lineBuffers_[i])
> > + return -ENOMEM;
> > + }
> > +
> > + measuredFrames_ = 0;
> > + frameProcessTime_ = 0;
> > +
> > + return 0;
> > +}
> > +
> > +/*
> > + * Get width and height at which the bayer-pattern repeats.
> > + * Return pattern-size or an empty Size for an unsupported inputFormat.
> > + */
> > +Size DebayerCpu::patternSize(PixelFormat inputFormat)
> > +{
> > + DebayerCpu::DebayerInputConfig config;
> > +
> > + if (getInputConfig(inputFormat, config) != 0)
> > + return {};
> > +
> > + return config.patternSize;
> > +}
> > +
> > +std::vector<PixelFormat> DebayerCpu::formats(PixelFormat inputFormat)
> > +{
> > + DebayerCpu::DebayerInputConfig config;
> > +
> > + if (getInputConfig(inputFormat, config) != 0)
> > + return std::vector<PixelFormat>();
> > +
> > + return config.outputFormats;
> > +}
> > +
> > +std::tuple<unsigned int, unsigned int>
> > +DebayerCpu::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size)
> > +{
> > + DebayerCpu::DebayerOutputConfig config;
> > +
> > + if (getOutputConfig(outputFormat, config) != 0)
> > + return std::make_tuple(0, 0);
> > +
> > + /* round up to multiple of 8 for 64 bits alignment */
> > + unsigned int stride = (size.width * config.bpp / 8 + 7) & ~7;
> > +
> > + return std::make_tuple(stride, stride * size.height);
> > +}
> > +
> > +void DebayerCpu::setupInputMemcpy(const uint8_t *linePointers[])
> > +{
> > + const unsigned int patternHeight = inputConfig_.patternSize.height;
> > +
> > + if (!enableInputMemcpy_)
> > + return;
> > +
> > + for (unsigned int i = 0; i < patternHeight; i++) {
> > + memcpy(lineBuffers_[i], linePointers[i + 1] - lineBufferPadding_,
> > + lineBufferLength_);
> > + linePointers[i + 1] = lineBuffers_[i] + lineBufferPadding_;
> > + }
> > +
> > + /* Point lineBufferIndex_ to first unused lineBuffer */
> > + lineBufferIndex_ = patternHeight;
> > +}
> > +
> > +void DebayerCpu::shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src)
> > +{
> > + const unsigned int patternHeight = inputConfig_.patternSize.height;
> > +
> > + for (unsigned int i = 0; i < patternHeight; i++)
> > + linePointers[i] = linePointers[i + 1];
> > +
> > + linePointers[patternHeight] = src +
> > + (patternHeight / 2) * (int)inputConfig_.stride;
> > +}
> > +
> > +void DebayerCpu::memcpyNextLine(const uint8_t *linePointers[])
> > +{
> > + const unsigned int patternHeight = inputConfig_.patternSize.height;
> > +
> > + if (!enableInputMemcpy_)
> > + return;
> > +
> > + memcpy(lineBuffers_[lineBufferIndex_], linePointers[patternHeight] - lineBufferPadding_,
> > + lineBufferLength_);
> > + linePointers[patternHeight] = lineBuffers_[lineBufferIndex_] + lineBufferPadding_;
> > +
> > + lineBufferIndex_ = (lineBufferIndex_ + 1) % (patternHeight + 1);
> > +}
> > +
> > +void DebayerCpu::process2(const uint8_t *src, uint8_t *dst)
> > +{
> > + unsigned int y_end = window_.y + window_.height;
> > + /* Holds [0] previous- [1] current- [2] next-line */
> > + const uint8_t *linePointers[3];
> > +
> > + /* Adjust src to top left corner of the window */
> > + src += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8;
> > +
> > + /* [x] becomes [x - 1] after initial shiftLinePointers() call */
> > + if (window_.y) {
> > + linePointers[1] = src - inputConfig_.stride; /* previous-line */
> > + linePointers[2] = src;
> > + } else {
> > + /* window_.y == 0, use the next line as prev line */
> > + linePointers[1] = src + inputConfig_.stride;
> > + linePointers[2] = src;
> > + /* Last 2 lines also need special handling */
> > + y_end -= 2;
> > + }
> > +
> > + setupInputMemcpy(linePointers);
> > +
> > + for (unsigned int y = window_.y; y < y_end; y += 2) {
> > + shiftLinePointers(linePointers, src);
> > + memcpyNextLine(linePointers);
> > + stats_->processLine0(y, linePointers);
> > + (this->*debayer0_)(dst, linePointers);
> > + src += inputConfig_.stride;
> > + dst += outputConfig_.stride;
> > +
> > + shiftLinePointers(linePointers, src);
> > + memcpyNextLine(linePointers);
> > + (this->*debayer1_)(dst, linePointers);
> > + src += inputConfig_.stride;
> > + dst += outputConfig_.stride;
> > + }
> > +
> > + if (window_.y == 0) {
> > + shiftLinePointers(linePointers, src);
> > + memcpyNextLine(linePointers);
> > + stats_->processLine0(y_end, linePointers);
> > + (this->*debayer0_)(dst, linePointers);
> > + src += inputConfig_.stride;
> > + dst += outputConfig_.stride;
> > +
> > + shiftLinePointers(linePointers, src);
> > + /* next line may point outside of src, use prev. */
> > + linePointers[2] = linePointers[0];
> > + (this->*debayer1_)(dst, linePointers);
> > + src += inputConfig_.stride;
> > + dst += outputConfig_.stride;
> > + }
> > +}
> > +
> > +void DebayerCpu::process4(const uint8_t *src, uint8_t *dst)
> > +{
> > + const unsigned int y_end = window_.y + window_.height;
> > + /*
> > + * This holds pointers to [0] 2-lines-up [1] 1-line-up [2] current-line
> > + * [3] 1-line-down [4] 2-lines-down.
> > + */
> > + const uint8_t *linePointers[5];
> > +
> > + /* Adjust src to top left corner of the window */
> > + src += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8;
> > +
> > + /* [x] becomes [x - 1] after initial shiftLinePointers() call */
> > + linePointers[1] = src - 2 * inputConfig_.stride;
> > + linePointers[2] = src - inputConfig_.stride;
> > + linePointers[3] = src;
> > + linePointers[4] = src + inputConfig_.stride;
> > +
> > + setupInputMemcpy(linePointers);
> > +
> > + for (unsigned int y = window_.y; y < y_end; y += 4) {
> > + shiftLinePointers(linePointers, src);
> > + memcpyNextLine(linePointers);
> > + stats_->processLine0(y, linePointers);
> > + (this->*debayer0_)(dst, linePointers);
> > + src += inputConfig_.stride;
> > + dst += outputConfig_.stride;
> > +
> > + shiftLinePointers(linePointers, src);
> > + memcpyNextLine(linePointers);
> > + (this->*debayer1_)(dst, linePointers);
> > + src += inputConfig_.stride;
> > + dst += outputConfig_.stride;
> > +
> > + shiftLinePointers(linePointers, src);
> > + memcpyNextLine(linePointers);
> > + stats_->processLine2(y, linePointers);
> > + (this->*debayer2_)(dst, linePointers);
> > + src += inputConfig_.stride;
> > + dst += outputConfig_.stride;
> > +
> > + shiftLinePointers(linePointers, src);
> > + memcpyNextLine(linePointers);
> > + (this->*debayer3_)(dst, linePointers);
> > + src += inputConfig_.stride;
> > + dst += outputConfig_.stride;
> > + }
> > +}
> > +
> > +static inline int64_t timeDiff(timespec &after, timespec &before)
> > +{
> > + return (after.tv_sec - before.tv_sec) * 1000000000LL +
> > + (int64_t)after.tv_nsec - (int64_t)before.tv_nsec;
> > +}
> > +
> > +void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params)
> > +{
> > + timespec frameStartTime;
> > +
> > + if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure) {
> > + frameStartTime = {};
> > + clock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime);
> > + }
> > +
> > + /* Apply DebayerParams */
> > + if (params.gamma != gamma_correction_) {
> > + for (unsigned int i = 0; i < kGammaLookupSize; i++)
> > + gamma_[i] = UINT8_MAX * powf(i / (kGammaLookupSize - 1.0), params.gamma);
> > +
> > + gamma_correction_ = params.gamma;
> > + }
> > +
> > + for (unsigned int i = 0; i < kRGBLookupSize; i++) {
> > + constexpr unsigned int div =
> > + kRGBLookupSize * DebayerParams::kGain10 / kGammaLookupSize;
> > + unsigned int idx;
> > +
> > + /* Apply gamma after gain! */
> > + idx = std::min({ i * params.gainR / div, (kGammaLookupSize - 1) });
> > + red_[i] = gamma_[idx];
> > +
> > + idx = std::min({ i * params.gainG / div, (kGammaLookupSize - 1) });
> > + green_[i] = gamma_[idx];
> > +
> > + idx = std::min({ i * params.gainB / div, (kGammaLookupSize - 1) });
> > + blue_[i] = gamma_[idx];
> > + }
> > +
> > + /* Copy metadata from the input buffer */
> > + FrameMetadata &metadata = output->_d()->metadata();
> > + metadata.status = input->metadata().status;
> > + metadata.sequence = input->metadata().sequence;
> > + metadata.timestamp = input->metadata().timestamp;
> > +
> > + MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read);
> > + MappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write);
> > + if (!in.isValid() || !out.isValid()) {
> > + LOG(Debayer, Error) << "mmap-ing buffer(s) failed";
> > + metadata.status = FrameMetadata::FrameError;
> > + return;
> > + }
> > +
> > + stats_->startFrame();
> > +
> > + if (inputConfig_.patternSize.height == 2)
> > + process2(in.planes()[0].data(), out.planes()[0].data());
> > + else
> > + process4(in.planes()[0].data(), out.planes()[0].data());
> > +
> > + metadata.planes()[0].bytesused = out.planes()[0].size();
> > +
> > + /* Measure before emitting signals */
> > + if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure &&
> > + ++measuredFrames_ > DebayerCpu::kFramesToSkip) {
> > + timespec frameEndTime = {};
> > + clock_gettime(CLOCK_MONOTONIC_RAW, &frameEndTime);
> > + frameProcessTime_ += timeDiff(frameEndTime, frameStartTime);
> > + if (measuredFrames_ == DebayerCpu::kLastFrameToMeasure) {
> > + const unsigned int measuredFrames = DebayerCpu::kLastFrameToMeasure -
> > + DebayerCpu::kFramesToSkip;
> > + LOG(Debayer, Info)
> > + << "Processed " << measuredFrames
> > + << " frames in " << frameProcessTime_ / 1000 << "us, "
> > + << frameProcessTime_ / (1000 * measuredFrames)
> > + << " us/frame";
> > + }
> > + }
>
> Is this debugging leftovers, or should it be cleaned up ? It seems quite
> a bit of a hack. If we want to enable measurements, we shouldn't
> hardcode them to frames 30 to 60.
>
> > +
> > + stats_->finishFrame();
> > + outputBufferReady.emit(output);
> > + inputBufferReady.emit(input);
> > +}
> > +
> > +SizeRange DebayerCpu::sizes(PixelFormat inputFormat, const Size &inputSize)
> > +{
> > + Size pattern_size = patternSize(inputFormat);
> > + unsigned int border_height = pattern_size.height;
> > +
> > + if (pattern_size.isNull())
> > + return {};
> > +
> > + /* No need for top/bottom border with a pattern height of 2 */
> > + if (pattern_size.height == 2)
> > + border_height = 0;
> > +
> > + /*
> > + * For debayer interpolation a border is kept around the entire image
> > + * and the minimum output size is pattern-height x pattern-width.
> > + */
> > + if (inputSize.width < (3 * pattern_size.width) ||
> > + inputSize.height < (2 * border_height + pattern_size.height)) {
> > + LOG(Debayer, Warning)
> > + << "Input format size too small: " << inputSize.toString();
> > + return {};
> > + }
> > +
> > + return SizeRange(Size(pattern_size.width, pattern_size.height),
> > + Size((inputSize.width - 2 * pattern_size.width) & ~(pattern_size.width - 1),
> > + (inputSize.height - 2 * border_height) & ~(pattern_size.height - 1)),
> > + pattern_size.width, pattern_size.height);
> > +}
> > +
> > +} /* namespace libcamera */
> > diff --git a/src/libcamera/software_isp/debayer_cpu.h b/src/libcamera/software_isp/debayer_cpu.h
> > new file mode 100644
> > index 00000000..8a51ed85
> > --- /dev/null
> > +++ b/src/libcamera/software_isp/debayer_cpu.h
> > @@ -0,0 +1,143 @@
> > +/* SPDX-License-Identifier: LGPL-2.1-or-later */
> > +/*
> > + * Copyright (C) 2023, Linaro Ltd
> > + * Copyright (C) 2023, Red Hat Inc.
> > + *
> > + * Authors:
> > + * Hans de Goede <hdegoede at redhat.com>
> > + *
> > + * debayer_cpu.h - CPU based debayering header
> > + */
> > +
> > +#pragma once
> > +
> > +#include <memory>
> > +#include <stdint.h>
> > +#include <vector>
> > +
> > +#include <libcamera/base/object.h>
> > +
> > +#include "debayer.h"
> > +#include "swstats_cpu.h"
> > +
> > +namespace libcamera {
> > +
> > +class DebayerCpu : public Debayer, public Object
> > +{
> > +public:
> > + DebayerCpu(std::unique_ptr<SwStatsCpu> stats);
> > + ~DebayerCpu();
> > +
> > + int configure(const StreamConfiguration &inputCfg,
> > + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs);
> > + Size patternSize(PixelFormat inputFormat);
> > + std::vector<PixelFormat> formats(PixelFormat input);
> > + std::tuple<unsigned int, unsigned int>
> > + strideAndFrameSize(const PixelFormat &outputFormat, const Size &size);
> > + void process(FrameBuffer *input, FrameBuffer *output, DebayerParams params);
> > + SizeRange sizes(PixelFormat inputFormat, const Size &inputSize);
> > +
> > + /**
> > + * \brief Get the file descriptor for the statistics.
> > + *
> > + * \return the file descriptor pointing to the statistics.
> > + */
> > + const SharedFD &getStatsFD() { return stats_->getStatsFD(); }
>
> This, plus the fact that this class handles colour gains and gamma,
> makes me thing we have either a naming issue, or an architecture issue.
>
> > +
> > + /**
> > + * \brief Get the output frame size.
> > + *
> > + * \return The output frame size.
> > + */
> > + unsigned int frameSize() { return outputConfig_.frameSize; }
> > +
> > +private:
> > + /**
> > + * \brief Called to debayer 1 line of Bayer input data to output format
> > + * \param[out] dst Pointer to the start of the output line to write
> > + * \param[in] src The input data
> > + *
> > + * Input data is an array of (patternSize_.height + 1) src
> > + * pointers each pointing to a line in the Bayer source. The middle
> > + * element of the array will point to the actual line being processed.
> > + * Earlier element(s) will point to the previous line(s) and later
> > + * element(s) to the next line(s).
> > + *
> > + * These functions take an array of src pointers, rather than
> > + * a single src pointer + a stride for the source, so that when the src
> > + * is slow uncached memory it can be copied to faster memory before
> > + * debayering. Debayering a standard 2x2 Bayer pattern requires access
> > + * to the previous and next src lines for interpolating the missing
> > + * colors. To allow copying the src lines only once 3 temporary buffers
> > + * each holding a single line are used, re-using the oldest buffer for
> > + * the next line and the pointers are swizzled so that:
> > + * src[0] = previous-line, src[1] = currrent-line, src[2] = next-line.
> > + * This way the 3 pointers passed to the debayer functions form
> > + * a sliding window over the src avoiding the need to copy each
> > + * line more than once.
> > + *
> > + * Similarly for bayer patterns which repeat every 4 lines, 5 src
> > + * pointers are passed holding: src[0] = 2-lines-up, src[1] = 1-line-up
> > + * src[2] = current-line, src[3] = 1-line-down, src[4] = 2-lines-down.
> > + */
> > + using debayerFn = void (DebayerCpu::*)(uint8_t *dst, const uint8_t *src[]);
> > +
> > + /* CSI-2 packed 10-bit raw bayer format (all the 4 orders) */
> > + void debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
> > + void debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
> > + void debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]);
> > + void debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]);
> > +
> > + struct DebayerInputConfig {
> > + Size patternSize;
> > + unsigned int bpp; /* Memory used per pixel, not precision */
> > + unsigned int stride;
> > + std::vector<PixelFormat> outputFormats;
> > + };
> > +
> > + struct DebayerOutputConfig {
> > + unsigned int bpp; /* Memory used per pixel, not precision */
> > + unsigned int stride;
> > + unsigned int frameSize;
> > + };
> > +
> > + int getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config);
> > + int getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config);
> > + int setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputFormat);
> > + void setupInputMemcpy(const uint8_t *linePointers[]);
> > + void shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src);
> > + void memcpyNextLine(const uint8_t *linePointers[]);
> > + void process2(const uint8_t *src, uint8_t *dst);
> > + void process4(const uint8_t *src, uint8_t *dst);
> > +
> > + static constexpr unsigned int kGammaLookupSize = 1024;
> > + static constexpr unsigned int kRGBLookupSize = 256;
> > + /* Max. supported Bayer pattern height is 4, debayering this requires 5 lines */
> > + static constexpr unsigned int kMaxLineBuffers = 5;
> > +
> > + std::array<uint8_t, kGammaLookupSize> gamma_;
> > + std::array<uint8_t, kRGBLookupSize> red_;
> > + std::array<uint8_t, kRGBLookupSize> green_;
> > + std::array<uint8_t, kRGBLookupSize> blue_;
> > + debayerFn debayer0_;
> > + debayerFn debayer1_;
> > + debayerFn debayer2_;
> > + debayerFn debayer3_;
> > + Rectangle window_;
> > + DebayerInputConfig inputConfig_;
> > + DebayerOutputConfig outputConfig_;
> > + std::unique_ptr<SwStatsCpu> stats_;
> > + uint8_t *lineBuffers_[kMaxLineBuffers];
> > + unsigned int lineBufferLength_;
> > + unsigned int lineBufferPadding_;
> > + unsigned int lineBufferIndex_;
> > + bool enableInputMemcpy_;
> > + float gamma_correction_;
> > + unsigned int measuredFrames_;
> > + int64_t frameProcessTime_;
> > + /* Skip 30 frames for things to stabilize then measure 30 frames */
> > + static constexpr unsigned int kFramesToSkip = 30;
> > + static constexpr unsigned int kLastFrameToMeasure = 60;
> > +};
> > +
> > +} /* namespace libcamera */
> > diff --git a/src/libcamera/software_isp/meson.build b/src/libcamera/software_isp/meson.build
> > index 62095f61..71b46539 100644
> > --- a/src/libcamera/software_isp/meson.build
> > +++ b/src/libcamera/software_isp/meson.build
> > @@ -9,5 +9,6 @@ endif
> >
> > libcamera_sources += files([
> > 'debayer.cpp',
> > + 'debayer_cpu.cpp',
> > 'swstats_cpu.cpp',
> > ])
--
Regards,
Laurent Pinchart
More information about the libcamera-devel
mailing list