[PATCH v2 10/18] libcamera: software_isp: Add DebayerCpu class
Milan Zamazal
mzamazal at redhat.com
Tue Jan 23 22:23:27 CET 2024
Hans de Goede <hdegoede at redhat.com> writes:
> Add CPU based debayering implementation. This initial implementation
> only supports debayering packed 10 bits per pixel bayer data in
> the 4 standard bayer orders.
>
> Doxygen documentation by Dennis Bonke.
>
> Co-authored-by: Dennis Bonke <admin at dennisbonke.com>
> Signed-off-by: Dennis Bonke <admin at dennisbonke.com>
> Co-authored-by: Andrey Konovalov <andrey.konovalov at linaro.org>
> Signed-off-by: Andrey Konovalov <andrey.konovalov at linaro.org>
> Co-authored-by: Pavel Machek <pavel at ucw.cz>
> Signed-off-by: Pavel Machek <pavel at ucw.cz>
> Signed-off-by: Hans de Goede <hdegoede at redhat.com>
> Tested-by: Bryan O'Donoghue <bryan.odonoghue at linaro.org> # sc8280xp Lenovo x13s
> Tested-by: Pavel Machek <pavel at ucw.cz>
> ---
> .../internal/software_isp/debayer_cpu.h | 131 +++++
> .../internal/software_isp/meson.build | 1 +
> src/libcamera/software_isp/debayer_cpu.cpp | 528 ++++++++++++++++++
> src/libcamera/software_isp/meson.build | 1 +
> 4 files changed, 661 insertions(+)
> create mode 100644 include/libcamera/internal/software_isp/debayer_cpu.h
> create mode 100644 src/libcamera/software_isp/debayer_cpu.cpp
>
> diff --git a/include/libcamera/internal/software_isp/debayer_cpu.h b/include/libcamera/internal/software_isp/debayer_cpu.h
> new file mode 100644
> index 00000000..78573f44
> --- /dev/null
> +++ b/include/libcamera/internal/software_isp/debayer_cpu.h
> @@ -0,0 +1,131 @@
> +/* SPDX-License-Identifier: LGPL-2.1-or-later */
> +/*
> + * Copyright (C) 2023, Linaro Ltd
> + * Copyright (C) 2023, Red Hat Inc.
> + *
> + * Authors:
> + * Hans de Goede <hdegoede at redhat.com>
> + *
> + * debayer_cpu.h - CPU based debayering header
> + */
> +
> +#pragma once
> +
> +#include <memory>
> +#include <stdint.h>
> +#include <vector>
> +
> +#include <libcamera/base/object.h>
> +
> +#include "libcamera/internal/software_isp/swstats_cpu.h"
> +#include "libcamera/internal/software_isp/debayer.h"
> +
> +namespace libcamera {
> +
> +/**
> + * \class DebayerCpu
> + * \brief Class for debayering on the CPU
> + *
> + * Implementation for CPU based debayering
> + */
> +class DebayerCpu : public Debayer, public Object
> +{
> +public:
> + /*
> + * FIXME this should be a plain (implementation independent) SwStats
> + * this can be fixed once getStats() is dropped.
> + */
> + /**
> + * \brief Constructs a DebayerCpu object.
> + * \param[in] stats Pointer to the stats object to use.
> + */
> + DebayerCpu(std::unique_ptr<SwStatsCpu> stats);
> + ~DebayerCpu();
> +
> + /*
> + * Setup the Debayer object according to the passed in parameters.
> + * Return 0 on success, a negative errno value on failure
> + * (unsupported parameters).
> + */
> + int configure(const StreamConfiguration &inputCfg,
> + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs);
> +
> + /*
> + * Get width and height at which the bayer-pattern repeats.
> + * Return pattern-size or an empty Size for an unsupported inputFormat.
> + */
> + Size patternSize(PixelFormat inputFormat);
> +
> + std::vector<PixelFormat> formats(PixelFormat input);
> + std::tuple<unsigned int, unsigned int>
> + strideAndFrameSize(const PixelFormat &outputFormat, const Size &size);
> +
> + void process(FrameBuffer *input, FrameBuffer *output, DebayerParams params);
> +
> + /**
> + * \brief Get the file descriptor for the statistics.
> + *
> + * \return the file descriptor pointing to the statistics.
> + */
> + const SharedFD &getStatsFD() { return stats_->getStatsFD(); }
> +
> + /**
> + * \brief Get the output frame size.
> + *
> + * \return The output frame size.
> + */
> + unsigned int frameSize() { return outputConfig_.frameSize; }
> +private:
> + void initLinePointers(const uint8_t *linePointers[], const uint8_t *src);
> + void shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src);
> + void process2(const uint8_t *src, uint8_t *dst);
> + void process4(const uint8_t *src, uint8_t *dst);
> + /* CSI-2 packed 10-bit raw bayer format (all the 4 orders) */
> + void debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
> + void debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
> + void debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]);
> + void debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]);
> +
> + typedef void (DebayerCpu::*debayerFn)(uint8_t *dst, const uint8_t *src[]);
> +
> + struct DebayerInputConfig {
> + Size patternSize;
> + unsigned int bpp; /* Memory used per pixel, not precision */
> + unsigned int stride;
> + std::vector<PixelFormat> outputFormats;
> + };
> +
> + struct DebayerOutputConfig {
> + unsigned int bpp; /* Memory used per pixel, not precision */
> + unsigned int stride;
> + unsigned int frameSize;
> + };
> +
> + int getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config);
> + int getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config);
> + int setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputFormat);
> +
> + uint8_t gamma_[1024];
> + uint8_t red_[256];
> + uint8_t green_[256];
> + uint8_t blue_[256];
Named constants for the array sizes?
> + debayerFn debayer0_;
> + debayerFn debayer1_;
> + debayerFn debayer2_;
> + debayerFn debayer3_;
> + Rectangle window_;
> + DebayerInputConfig inputConfig_;
> + DebayerOutputConfig outputConfig_;
> + std::unique_ptr<SwStatsCpu> stats_;
> + uint8_t *lineBuffers_[5];
A named constant for the array size?
Also, the array size assumes a limit on the pattern height, which is reasonable
but not documented.
> + unsigned int lineBufferIndex_;
> + bool enableInputMemcpy_;
> + float gamma_correction_;
> + int measuredFrames_;
> + int64_t frameProcessTime_;
> + /* Skip 30 frames for things to stabilize then measure 30 frames */
> + static const int framesToSkip = 30;
> + static const int framesToMeasure = 60;
Frames-to-measure is 30, a name like lastFrameToMeasure might be a bit clearer.
> +};
> +
> +} /* namespace libcamera */
> diff --git a/include/libcamera/internal/software_isp/meson.build b/include/libcamera/internal/software_isp/meson.build
> index 7e40925e..b5a0d737 100644
> --- a/include/libcamera/internal/software_isp/meson.build
> +++ b/include/libcamera/internal/software_isp/meson.build
> @@ -2,6 +2,7 @@
>
> libcamera_internal_headers += files([
> 'debayer.h',
> + 'debayer_cpu.h',
> 'debayer_params.h',
> 'swisp_stats.h',
> 'swstats.h',
> diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
> new file mode 100644
> index 00000000..e0c3c658
> --- /dev/null
> +++ b/src/libcamera/software_isp/debayer_cpu.cpp
> @@ -0,0 +1,528 @@
> +/* SPDX-License-Identifier: LGPL-2.1-or-later */
> +/*
> + * Copyright (C) 2023, Linaro Ltd
> + * Copyright (C) 2023, Red Hat Inc.
> + *
> + * Authors:
> + * Hans de Goede <hdegoede at redhat.com>
> + *
> + * debayer_cpu.cpp - CPU based debayering class
> + */
> +
> +#include "libcamera/internal/software_isp/debayer_cpu.h"
> +
> +#include <math.h>
> +#include <stdlib.h>
> +#include <time.h>
> +
> +#include <libcamera/formats.h>
> +
> +#include "libcamera/internal/bayer_format.h"
> +#include "libcamera/internal/framebuffer.h"
> +#include "libcamera/internal/mapped_framebuffer.h"
> +
> +namespace libcamera {
> +
> +DebayerCpu::DebayerCpu(std::unique_ptr<SwStatsCpu> stats)
> + : stats_(std::move(stats)), gamma_correction_(1.0)
> +{
> +#ifdef __x86_64__
> + enableInputMemcpy_ = false;
> +#else
> + enableInputMemcpy_ = true;
> +#endif
> + /* Initialize gamma to 1.0 curve */
> + for (int i = 0; i < 1024; i++)
> + gamma_[i] = i / 4;
> +
> + for (int i = 0; i < 5; i++)
> + lineBuffers_[i] = NULL;
A question for C/C++ experts whether NULL or nullptr should be used here, with a
special consideration of free() calls below.
> +}
> +
> +DebayerCpu::~DebayerCpu()
> +{
> + for (int i = 0; i < 5; i++)
> + free(lineBuffers_[i]);
> +}
> +
> +// RGR
> +// GBG
> +// RGR
> +#define BGGR_BGR888(p, n, div) \
> + *dst++ = blue_[curr[x] / (div)]; \
> + *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))]; \
> + *dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
> + x++;
> +
> +// GBG
> +// RGR
> +// GBG
> +#define GRBG_BGR888(p, n, div) \
> + *dst++ = blue_[(prev[x] + next[x]) / (2 * (div))]; \
> + *dst++ = green_[curr[x] / (div)]; \
> + *dst++ = red_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
> + x++;
> +
> +// GRG
> +// BGB
> +// GRG
> +#define GBRG_BGR888(p, n, div) \
> + *dst++ = blue_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
> + *dst++ = green_[curr[x] / (div)]; \
> + *dst++ = red_[(prev[x] + next[x]) / (2 * (div))]; \
> + x++;
> +
> +// BGB
> +// GRG
> +// BGB
> +#define RGGB_BGR888(p, n, div) \
> + *dst++ = blue_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
> + *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))]; \
> + *dst++ = red_[curr[x] / (div)]; \
> + x++;
> +
> +void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
> +{
> + const int width_in_bytes = window_.width * 5 / 4;
> + const uint8_t *prev = (const uint8_t *)src[0];
> + const uint8_t *curr = (const uint8_t *)src[1];
> + const uint8_t *next = (const uint8_t *)src[2];
> +
> + /*
> + * For the first pixel getting a pixel from the previous column uses
> + * x - 2 to skip the 5th byte with least-significant bits for 4 pixels.
> + * Same for last pixel (uses x + 2) and looking at the next column.
> + * x++ in the for-loop skips the 5th byte with 4 x 2 lsb-s for 10bit packed.
> + */
> + for (int x = 0; x < width_in_bytes; x++) {
> + /* Even pixel */
> + BGGR_BGR888(2, 1, 1)
> + /* Odd pixel BGGR -> GBRG */
> + GBRG_BGR888(1, 1, 1)
> + /* Same thing for next 2 pixels */
> + BGGR_BGR888(1, 1, 1)
> + GBRG_BGR888(1, 2, 1)
"Even" and "odd" is a bit confusing here. How about "first", "second", "third", "fourth"?
> + }
> +}
> +
> +void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
> +{
> + const int width_in_bytes = window_.width * 5 / 4;
> + const uint8_t *prev = (const uint8_t *)src[0];
> + const uint8_t *curr = (const uint8_t *)src[1];
> + const uint8_t *next = (const uint8_t *)src[2];
> +
> + for (int x = 0; x < width_in_bytes; x++) {
> + /* Even pixel */
> + GRBG_BGR888(2, 1, 1)
> + /* Odd pixel GRBG -> RGGB */
> + RGGB_BGR888(1, 1, 1)
> + /* Same thing for next 2 pixels */
> + GRBG_BGR888(1, 1, 1)
> + RGGB_BGR888(1, 2, 1)
> + }
> +}
> +
> +void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[])
> +{
> + const int width_in_bytes = window_.width * 5 / 4;
> + const uint8_t *prev = (const uint8_t *)src[0];
> + const uint8_t *curr = (const uint8_t *)src[1];
> + const uint8_t *next = (const uint8_t *)src[2];
> +
> + for (int x = 0; x < width_in_bytes; x++) {
> + /* Even pixel */
> + GBRG_BGR888(2, 1, 1)
> + /* Odd pixel GBGR -> BGGR */
> + BGGR_BGR888(1, 1, 1)
> + /* Same thing for next 2 pixels */
> + GBRG_BGR888(1, 1, 1)
> + BGGR_BGR888(1, 2, 1)
> + }
> +}
> +
> +void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[])
> +{
> + const int width_in_bytes = window_.width * 5 / 4;
> + const uint8_t *prev = (const uint8_t *)src[0];
> + const uint8_t *curr = (const uint8_t *)src[1];
> + const uint8_t *next = (const uint8_t *)src[2];
> +
> + for (int x = 0; x < width_in_bytes; x++) {
> + /* Even pixel */
> + RGGB_BGR888(2, 1, 1)
> + /* Odd pixel RGGB -> GRBG*/
> + GRBG_BGR888(1, 1, 1)
> + /* Same thing for next 2 pixels */
> + RGGB_BGR888(1, 1, 1)
> + GRBG_BGR888(1, 2, 1)
> + }
> +}
> +
> +static bool isStandardBayerOrder(BayerFormat::Order order)
> +{
> + return order == BayerFormat::BGGR || order == BayerFormat::GBRG ||
> + order == BayerFormat::GRBG || order == BayerFormat::RGGB;
> +}
> +
> +int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config)
> +{
> + BayerFormat bayerFormat =
> + BayerFormat::fromPixelFormat(inputFormat);
> +
> + if (bayerFormat.bitDepth == 10 &&
> + bayerFormat.packing == BayerFormat::Packing::CSI2 &&
> + isStandardBayerOrder(bayerFormat.order)) {
> + config.bpp = 10;
> + config.patternSize.width = 4; /* 5 bytes per *4* pixels */
> + config.patternSize.height = 2;
> + config.outputFormats = std::vector<PixelFormat>({ formats::RGB888 });
> + return 0;
> + }
> +
> + LOG(Debayer, Info)
> + << "Unsupported input format " << inputFormat.toString();
> + return -EINVAL;
> +}
> +
> +int DebayerCpu::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config)
> +{
> + if (outputFormat == formats::RGB888) {
> + config.bpp = 24;
> + return 0;
> + }
> +
> + LOG(Debayer, Info)
> + << "Unsupported output format " << outputFormat.toString();
> + return -EINVAL;
> +}
> +
> +/* TODO: this ignores outputFormat since there is only 1 supported outputFormat for now */
> +int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, [[maybe_unused]] PixelFormat outputFormat)
> +{
> + BayerFormat bayerFormat =
> + BayerFormat::fromPixelFormat(inputFormat);
> +
> + if (bayerFormat.bitDepth == 10 &&
> + bayerFormat.packing == BayerFormat::Packing::CSI2) {
> + switch (bayerFormat.order) {
> + case BayerFormat::BGGR:
> + debayer0_ = &DebayerCpu::debayer10P_BGBG_BGR888;
> + debayer1_ = &DebayerCpu::debayer10P_GRGR_BGR888;
> + return 0;
> + case BayerFormat::GBRG:
> + debayer0_ = &DebayerCpu::debayer10P_GBGB_BGR888;
> + debayer1_ = &DebayerCpu::debayer10P_RGRG_BGR888;
> + return 0;
> + case BayerFormat::GRBG:
> + debayer0_ = &DebayerCpu::debayer10P_GRGR_BGR888;
> + debayer1_ = &DebayerCpu::debayer10P_BGBG_BGR888;
> + return 0;
> + case BayerFormat::RGGB:
> + debayer0_ = &DebayerCpu::debayer10P_RGRG_BGR888;
> + debayer1_ = &DebayerCpu::debayer10P_GBGB_BGR888;
> + return 0;
> + default:
> + break;
> + }
> + }
> +
> + LOG(Debayer, Error) << "Unsupported input output format combination";
> + return -EINVAL;
> +}
> +
> +int DebayerCpu::configure(const StreamConfiguration &inputCfg,
> + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs)
> +{
> + if (getInputConfig(inputCfg.pixelFormat, inputConfig_) != 0)
> + return -EINVAL;
> +
> + if (stats_->configure(inputCfg) != 0)
> + return -EINVAL;
> +
> + const Size &stats_pattern_size = stats_->patternSize();
> + if (inputConfig_.patternSize.width != stats_pattern_size.width ||
> + inputConfig_.patternSize.height != stats_pattern_size.height) {
> + LOG(Debayer, Error)
> + << "mismatching stats and debayer pattern sizes for "
> + << inputCfg.pixelFormat.toString();
> + return -EINVAL;
> + }
> +
> + inputConfig_.stride = inputCfg.stride;
> +
> + if (outputCfgs.size() != 1) {
> + LOG(Debayer, Error)
> + << "Unsupported number of output streams: "
> + << outputCfgs.size();
> + return -EINVAL;
> + }
> +
> + const StreamConfiguration &outputCfg = outputCfgs[0];
> + SizeRange outSizeRange = sizes(inputCfg.pixelFormat, inputCfg.size);
> + std::tie(outputConfig_.stride, outputConfig_.frameSize) =
> + strideAndFrameSize(outputCfg.pixelFormat, outputCfg.size);
> +
> + if (!outSizeRange.contains(outputCfg.size) || outputConfig_.stride != outputCfg.stride) {
> + LOG(Debayer, Error)
> + << "Invalid output size/stride: "
> + << "\n " << outputCfg.size << " (" << outSizeRange << ")"
> + << "\n " << outputCfg.stride << " (" << outputConfig_.stride << ")";
> + return -EINVAL;
> + }
> +
> + if (setDebayerFunctions(inputCfg.pixelFormat, outputCfg.pixelFormat) != 0)
> + return -EINVAL;
> +
> + window_.x = ((inputCfg.size.width - outputCfg.size.width) / 2) &
> + ~(inputConfig_.patternSize.width - 1);
> + window_.y = ((inputCfg.size.height - outputCfg.size.height) / 2) &
> + ~(inputConfig_.patternSize.height - 1);
*Implicit* assumption that width and height are powers of 2?
> + window_.width = outputCfg.size.width;
> + window_.height = outputCfg.size.height;
> +
> + /* Don't pass x,y since process() already adjusts src before passing it */
> + stats_->setWindow(Rectangle(window_.size()));
> +
> + for (unsigned int i = 0;
> + i < (inputConfig_.patternSize.height + 1) && enableInputMemcpy_;
> + i++) {
> + /* pad with patternSize.Width on both left and right side */
> + size_t lineLength = (window_.width + 2 * inputConfig_.patternSize.width) *
> + inputConfig_.bpp / 8;
> +
> + free(lineBuffers_[i]);
> + lineBuffers_[i] = (uint8_t *)malloc(lineLength);
> + if (!lineBuffers_[i])
> + return -ENOMEM;
What happens if malloc above fails and then free in the DebayerCpu destructor is
called?
> + }
> +
> + measuredFrames_ = 0;
> + frameProcessTime_ = 0;
> +
> + return 0;
> +}
> +
> +Size DebayerCpu::patternSize(PixelFormat inputFormat)
> +{
> + DebayerCpu::DebayerInputConfig config;
> +
> + if (getInputConfig(inputFormat, config) != 0)
> + return {};
> +
> + return config.patternSize;
> +}
> +
> +std::vector<PixelFormat> DebayerCpu::formats(PixelFormat inputFormat)
> +{
> + DebayerCpu::DebayerInputConfig config;
> +
> + if (getInputConfig(inputFormat, config) != 0)
> + return std::vector<PixelFormat>();
> +
> + return config.outputFormats;
> +}
> +
> +std::tuple<unsigned int, unsigned int>
> +DebayerCpu::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size)
> +{
> + DebayerCpu::DebayerOutputConfig config;
> +
> + if (getOutputConfig(outputFormat, config) != 0)
> + return std::make_tuple(0, 0);
> +
> + /* round up to multiple of 8 for 64 bits alignment */
> + unsigned int stride = (size.width * config.bpp / 8 + 7) & ~7;
> +
> + return std::make_tuple(stride, stride * size.height);
> +}
> +
> +void DebayerCpu::initLinePointers(const uint8_t *linePointers[], const uint8_t *src)
> +{
> + const int patternHeight = inputConfig_.patternSize.height;
> +
> + for (int i = 0; i < patternHeight; i++)
> + linePointers[i + 1] = src +
> + (-patternHeight / 2 + i) * (int)inputConfig_.stride;
> +
> + if (!enableInputMemcpy_)
> + return;
> +
> + for (int i = 0; i < patternHeight; i++) {
> + /* pad with patternSize.Width on both left and right side */
> + size_t lineLength = (window_.width + 2 * inputConfig_.patternSize.width) *
> + inputConfig_.bpp / 8;
> + int padding = inputConfig_.patternSize.width * inputConfig_.bpp / 8;
The code/pattern duplication in lineLength and padding computation above and
below calls for inline helper functions.
> +
> + memcpy(lineBuffers_[i], linePointers[i + 1] - padding, lineLength);
> + linePointers[i + 1] = lineBuffers_[i] + padding;
> + }
> +
> + /* Point lineBufferIndex_ to first unused lineBuffer */
> + lineBufferIndex_ = patternHeight;
Tracking all the memory locations here and checking their correctness is
difficult. Not sure how to make it easier to understand, maybe more local
variables or comments?
> +}
> +
> +void DebayerCpu::shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src)
> +{
> + const int patternHeight = inputConfig_.patternSize.height;
> +
> + for (int i = 0; i < patternHeight; i++)
> + linePointers[i] = linePointers[i + 1];
> +
> + linePointers[patternHeight] = src +
> + (patternHeight / 2) * (int)inputConfig_.stride;
> +
> + if (!enableInputMemcpy_)
> + return;
> +
> + size_t lineLength = (window_.width + 2 * inputConfig_.patternSize.width) *
> + inputConfig_.bpp / 8;
> + int padding = inputConfig_.patternSize.width * inputConfig_.bpp / 8;
> + memcpy(lineBuffers_[lineBufferIndex_], linePointers[patternHeight] - padding, lineLength);
> + linePointers[patternHeight] = lineBuffers_[lineBufferIndex_] + padding;
> +
> + lineBufferIndex_ = (lineBufferIndex_ + 1) % (patternHeight + 1);
> +}
> +
> +void DebayerCpu::process2(const uint8_t *src, uint8_t *dst)
> +{
> + const unsigned int y_end = window_.y + window_.height;
> + const uint8_t *linePointers[3];
> +
> + /* Adjust src to top left corner of the window */
> + src += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8;
> +
> + initLinePointers(linePointers, src);
> +
> + for (unsigned int y = window_.y; y < y_end; y += 2) {
> + shiftLinePointers(linePointers, src);
> + stats_->processLine0(y, linePointers);
> + (this->*debayer0_)(dst, linePointers);
> + src += inputConfig_.stride;
> + dst += outputConfig_.stride;
> +
> + shiftLinePointers(linePointers, src);
> + (this->*debayer1_)(dst, linePointers);
> + src += inputConfig_.stride;
> + dst += outputConfig_.stride;
> + }
> +}
> +
> +void DebayerCpu::process4(const uint8_t *src, uint8_t *dst)
> +{
> + const unsigned int y_end = window_.y + window_.height;
> + const uint8_t *linePointers[5];
> +
> + /* Adjust src to top left corner of the window */
> + src += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8;
> +
> + initLinePointers(linePointers, src);
> +
> + for (unsigned int y = window_.y; y < y_end; y += 4) {
> + shiftLinePointers(linePointers, src);
> + stats_->processLine0(y, linePointers);
> + (this->*debayer0_)(dst, linePointers);
> + src += inputConfig_.stride;
> + dst += outputConfig_.stride;
> +
> + shiftLinePointers(linePointers, src);
> + (this->*debayer1_)(dst, linePointers);
> + src += inputConfig_.stride;
> + dst += outputConfig_.stride;
> +
> + shiftLinePointers(linePointers, src);
> + stats_->processLine2(y, linePointers);
> + (this->*debayer2_)(dst, linePointers);
> + src += inputConfig_.stride;
> + dst += outputConfig_.stride;
> +
> + shiftLinePointers(linePointers, src);
> + (this->*debayer3_)(dst, linePointers);
> + src += inputConfig_.stride;
> + dst += outputConfig_.stride;
> + }
> +}
> +
> +static inline int64_t timeDiff(timespec &after, timespec &before)
> +{
> + return (after.tv_sec - before.tv_sec) * 1000000000LL +
> + (int64_t)after.tv_nsec - (int64_t)before.tv_nsec;
> +}
> +
> +void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params)
> +{
> + timespec frameStartTime;
> +
> + if (measuredFrames_ < DebayerCpu::framesToMeasure) {
> + frameStartTime = {};
> + clock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime);
> + }
> +
> + /* Apply DebayerParams */
> + if (params.gamma != gamma_correction_) {
> + for (int i = 0; i < 1024; i++)
> + gamma_[i] = 255 * powf(i / 1023.0, params.gamma);
> +
> + gamma_correction_ = params.gamma;
> + }
> +
> + for (int i = 0; i < 256; i++) {
> + int idx;
> +
> + /* Apply gamma after gain! */
> + idx = std::min({ i * params.gainR / 64U, 1023U });
> + red_[i] = gamma_[idx];
> +
> + idx = std::min({ i * params.gainG / 64U, 1023U });
> + green_[i] = gamma_[idx];
> +
> + idx = std::min({ i * params.gainB / 64U, 1023U });
> + blue_[i] = gamma_[idx];
> + }
> +
> + /* Copy metadata from the input buffer */
> + FrameMetadata &metadata = output->_d()->metadata();
> + metadata.status = input->metadata().status;
> + metadata.sequence = input->metadata().sequence;
> + metadata.timestamp = input->metadata().timestamp;
> +
> + MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read);
> + MappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write);
> + if (!in.isValid() || !out.isValid()) {
> + LOG(Debayer, Error) << "mmap-ing buffer(s) failed";
> + metadata.status = FrameMetadata::FrameError;
> + return;
> + }
> +
> + stats_->startFrame();
> +
> + if (inputConfig_.patternSize.height == 2)
> + process2(in.planes()[0].data(), out.planes()[0].data());
> + else
> + process4(in.planes()[0].data(), out.planes()[0].data());
> +
> + metadata.planes()[0].bytesused = out.planes()[0].size();
> +
> + /* Measure before emitting signals */
> + if (measuredFrames_ < DebayerCpu::framesToMeasure &&
> + ++measuredFrames_ > DebayerCpu::framesToSkip) {
> + timespec frameEndTime = {};
> + clock_gettime(CLOCK_MONOTONIC_RAW, &frameEndTime);
> + frameProcessTime_ += timeDiff(frameEndTime, frameStartTime);
> + if (measuredFrames_ == DebayerCpu::framesToMeasure) {
> + const int measuredFrames = DebayerCpu::framesToMeasure -
> + DebayerCpu::framesToSkip;
> + LOG(Debayer, Info)
> + << "Processed " << measuredFrames
> + << " frames in " << frameProcessTime_ / 1000 << "us, "
> + << frameProcessTime_ / (1000 * measuredFrames)
> + << " us/frame";
> + }
> + }
> +
> + stats_->finishFrame();
> + outputBufferReady.emit(output);
> + inputBufferReady.emit(input);
The thread-bound calls problem here, Andrei already works on it.
> +}
> +
> +} /* namespace libcamera */
> diff --git a/src/libcamera/software_isp/meson.build b/src/libcamera/software_isp/meson.build
> index d4ae5ac7..6d7a44d7 100644
> --- a/src/libcamera/software_isp/meson.build
> +++ b/src/libcamera/software_isp/meson.build
> @@ -2,6 +2,7 @@
>
> libcamera_sources += files([
> 'debayer.cpp',
> + 'debayer_cpu.cpp',
> 'swstats.cpp',
> 'swstats_cpu.cpp',
> ])
More information about the libcamera-devel
mailing list