[libcamera-devel] [PATCH v4 3/4] ipa: ipu3: Add support for IPU3 AWB algorithm
Jean-Michel Hautbois
jeanmichel.hautbois at ideasonboard.com
Wed Mar 31 07:44:35 CEST 2021
Hi Kieran,
On 31/03/2021 00:48, Kieran Bingham wrote:
> Hi JM
>
> On 30/03/2021 22:12, Jean-Michel Hautbois wrote:
>> The IPA will locally modify the parameters before they are passed down
>> to the ImgU. Use a local parameter object to give a reference to those
>> algorithms.
>>
>> Inherit from the Algorithm class to implement basic AWB functions.
>>
>> Once AWB is done, a color temperature is estimated and a default CCM matrix
>> will be used (yet to be tuned).
>> Implement a basic "grey-world" AWB algorithm just for demonstration purpose.
>>
>> The BDS output size is passed by the pipeline handler to the IPA.
>> The best grid is then calculated to maximize the number of pixels taken
>> into account in each cells.
>>
>> As commented in the source code, it can be improved, as it has (at least)
>> one limitation: if a cell is big (say 128 pixels wide) and indicated as
>> saturated, it won't be taken into account at all.
>> Maybe is it possible to have a smaller one, at the cost of a few pixels
>> to lose, in which case we can center the grid using the x_start and
>> y_start parameters.
>>
>> Signed-off-by: Jean-Michel Hautbois <jeanmichel.hautbois at ideasonboard.com>
>> ---
>> src/ipa/ipu3/ipu3.cpp | 86 ++++++++++++--
>> src/ipa/ipu3/ipu3_awb.cpp | 235 ++++++++++++++++++++++++++++++++++++++
>> src/ipa/ipu3/ipu3_awb.h | 44 +++++++
>> src/ipa/ipu3/meson.build | 7 +-
>> 4 files changed, 363 insertions(+), 9 deletions(-)
>> create mode 100644 src/ipa/ipu3/ipu3_awb.cpp
>> create mode 100644 src/ipa/ipu3/ipu3_awb.h
>>
>> diff --git a/src/ipa/ipu3/ipu3.cpp b/src/ipa/ipu3/ipu3.cpp
>> index 34a907f2..1cce11c9 100644
>> --- a/src/ipa/ipu3/ipu3.cpp
>> +++ b/src/ipa/ipu3/ipu3.cpp
>> @@ -21,6 +21,11 @@
>> #include "libcamera/internal/buffer.h"
>> #include "libcamera/internal/log.h"
>>
>> +#include "ipu3_awb.h"
>> +
>> +static constexpr uint32_t kMaxCellWidthPerSet = 160;
>> +static constexpr uint32_t kMaxCellHeightPerSet = 80;
>> +
>> namespace libcamera {
>>
>> LOG_DEFINE_CATEGORY(IPAIPU3)
>> @@ -49,6 +54,7 @@ private:
>> const ipu3_uapi_stats_3a *stats);
>>
>> void setControls(unsigned int frame);
>> + void calculateBdsGrid(const Size &bdsOutputSize);
>>
>> std::map<unsigned int, MappedFrameBuffer> buffers_;
>>
>> @@ -61,6 +67,14 @@ private:
>> uint32_t gain_;
>> uint32_t minGain_;
>> uint32_t maxGain_;
>> +
>> + /* Interface to the AWB algorithm */
>> + std::unique_ptr<ipa::IPU3Awb> awbAlgo_;
>> +
>> + /* Local parameter storage */
>> + struct ipu3_uapi_params params_;
>> +
>> + struct ipu3_uapi_grid_config bdsGrid_;
>> };
>>
>> int IPAIPU3::start()
>> @@ -70,8 +84,59 @@ int IPAIPU3::start()
>> return 0;
>> }
>>
>> +/**
>> + * This method calculates a grid for the AWB algorithm in the IPU3 firmware.
>> + * Its input is the BDS output size calculated in the ImgU.
>> + * It is limited for now to the simplest method: find the lesser error
>> + * with the width/height and respective log2 width/height of the cells.
>> + *
>> + * \todo The frame is divided into cells which can be 8x8 => 128x128.
>> + * As a smaller cell improves the algorithm precision, adapting the
>> + * x_start and y_start parameters of the grid would provoke a loss of
>> + * some pixels but would also result in more accurate algorithms.
>> + */
>> +void IPAIPU3::calculateBdsGrid(const Size &bdsOutputSize)
>> +{
>> + uint32_t minError = std::numeric_limits<uint32_t>::max();
>> + uint32_t bestWidth = 0;
>> + uint32_t bestHeight = 0;
>> + uint32_t bestLog2Width = 0;
>> + uint32_t bestLog2Height = 0;
>> + bdsGrid_ = {};
>> +
>> + for (uint32_t widthShift = 3; widthShift <= 7; ++widthShift) {
>> + uint32_t width = std::min(kMaxCellWidthPerSet,
>> + bdsOutputSize.width >> widthShift);
>> + width = width << widthShift;
>> + for (uint32_t heightShift = 3; heightShift <= 7; ++heightShift) {
>> + int32_t height = std::min(kMaxCellHeightPerSet,
>> + bdsOutputSize.height >> heightShift);
>> + height = height << heightShift;
>> + uint32_t error = std::abs(static_cast<int>(width - bdsOutputSize.width)) + std::abs(static_cast<int>(height - bdsOutputSize.height));
>
> That's a long line ...
Checkstyle has corrected that one :-/ !
>> +
>> + if (error > minError)
>> + continue;
>> +
>> + minError = error;
>> + bestWidth = width;
>> + bestHeight = height;
>> + bestLog2Width = widthShift;
>> + bestLog2Height = heightShift;
>> + }
>> + }
>> +
>> + bdsGrid_.width = bestWidth >> bestLog2Width;
>> + bdsGrid_.block_width_log2 = bestLog2Width;
>> + bdsGrid_.height = bestHeight >> bestLog2Height;
>> + bdsGrid_.block_height_log2 = bestLog2Height;
>> +
>> + LOG(IPAIPU3, Debug) << "Best grid found is: ("
>> + << (int)bdsGrid_.width << " << " << (int)bdsGrid_.block_width_log2 << ") x ("
>> + << (int)bdsGrid_.height << "<<" << (int)bdsGrid_.block_height_log2 << ")";
>> +}
>> +
>> void IPAIPU3::configure(const std::map<uint32_t, ControlInfoMap> &entityControls,
>> - [[maybe_unused]] const Size &bdsOutputSize)
>> + const Size &bdsOutputSize)
>> {
>> if (entityControls.empty())
>> return;
>> @@ -92,11 +157,18 @@ void IPAIPU3::configure(const std::map<uint32_t, ControlInfoMap> &entityControls
>>
>> minExposure_ = std::max(itExp->second.min().get<int32_t>(), 1);
>> maxExposure_ = itExp->second.max().get<int32_t>();
>> - exposure_ = maxExposure_;
>> + exposure_ = minExposure_;
>>
>> minGain_ = std::max(itGain->second.min().get<int32_t>(), 1);
>> maxGain_ = itGain->second.max().get<int32_t>();
>> - gain_ = maxGain_;
>> + gain_ = minGain_;
>> +
>> + params_ = {};
>> +
>> + calculateBdsGrid(bdsOutputSize);
>> +
>> + awbAlgo_ = std::make_unique<ipa::IPU3Awb>();
>> + awbAlgo_->initialise(params_, bdsOutputSize, bdsGrid_);
>> }
>>
>> void IPAIPU3::mapBuffers(const std::vector<IPABuffer> &buffers)
>> @@ -168,10 +240,9 @@ void IPAIPU3::processControls([[maybe_unused]] unsigned int frame,
>>
>> void IPAIPU3::fillParams(unsigned int frame, ipu3_uapi_params *params)
>> {
>> - /* Prepare parameters buffer. */
>> - memset(params, 0, sizeof(*params));
>> + awbAlgo_->updateWbParameters(params_, 1.0);
>>
>> - /* \todo Fill in parameters buffer. */
>> + *params = params_;
>>
>> ipa::ipu3::IPU3Action op;
>> op.op = ipa::ipu3::ActionParamFilled;
>> @@ -184,8 +255,7 @@ void IPAIPU3::parseStatistics(unsigned int frame,
>> {
>> ControlList ctrls(controls::controls);
>>
>> - /* \todo React to statistics and update internal state machine. */
>> - /* \todo Add meta-data information to ctrls. */
>> + awbAlgo_->calculateWBGains(stats);
>>
>> ipa::ipu3::IPU3Action op;
>> op.op = ipa::ipu3::ActionMetadataReady;
>> diff --git a/src/ipa/ipu3/ipu3_awb.cpp b/src/ipa/ipu3/ipu3_awb.cpp
>> new file mode 100644
>> index 00000000..18d19d36
>> --- /dev/null
>> +++ b/src/ipa/ipu3/ipu3_awb.cpp
>> @@ -0,0 +1,235 @@
>> +/* SPDX-License-Identifier: LGPL-2.1-or-later */
>> +/*
>> + * Copyright (C) 2021, Ideas On Board
>> + *
>> + * ipu3_awb.cpp - AWB control algorithm
>> + */
>> +#include "ipu3_awb.h"
>> +
>> +#include <cmath>
>> +#include <numeric>
>> +#include <unordered_map>
>> +
>> +#include "libcamera/internal/log.h"
>> +
>> +namespace libcamera {
>> +
>> +namespace ipa {
>> +
>> +LOG_DEFINE_CATEGORY(IPU3Awb)
>> +
>> +static const struct ipu3_uapi_bnr_static_config imguCssBnrDefaults = {
>> + .wb_gains = { 16, 16, 16, 16 },
>> + .wb_gains_thr = { 255, 255, 255, 255 },
>> + .thr_coeffs = { 1700, 0, 31, 31, 0, 16 },
>> + .thr_ctrl_shd = { 26, 26, 26, 26 },
>> + .opt_center{ -648, 0, -366, 0 },
>> + .lut = {
>> + { 17, 23, 28, 32, 36, 39, 42, 45,
>> + 48, 51, 53, 55, 58, 60, 62, 64,
>> + 66, 68, 70, 72, 73, 75, 77, 78,
>> + 80, 82, 83, 85, 86, 88, 89, 90 } },
>> + .bp_ctrl = { 20, 0, 1, 40, 0, 6, 0, 6, 0 },
>> + .dn_detect_ctrl{ 9, 3, 4, 0, 8, 0, 1, 1, 1, 1, 0 },
>> + .column_size = 1296,
>> + .opt_center_sqr = { 419904, 133956 },
>> +};
>> +
>> +/* settings for Auto White Balance */
>> +static const struct ipu3_uapi_awb_config_s imguCssAwbDefaults = {
>> + .rgbs_thr_gr = 8191,
>> + .rgbs_thr_r = 8191,
>> + .rgbs_thr_gb = 8191,
>> + .rgbs_thr_b = 8191 | IPU3_UAPI_AWB_RGBS_THR_B_EN | IPU3_UAPI_AWB_RGBS_THR_B_INCL_SAT,
>> + .grid = {
>> + .width = 160,
>> + .height = 36,
>> + .block_width_log2 = 3,
>> + .block_height_log2 = 4,
>> + .height_per_slice = 1, /* Overridden by kernel. */
>> + .x_start = 0,
>> + .y_start = 0,
>> + .x_end = 0,
>> + .y_end = 0,
>> + },
>> +};
>> +
>> +static const struct ipu3_uapi_ccm_mat_config imguCssCcmDefault = {
>> + 8191, 0, 0, 0,
>> + 0, 8191, 0, 0,
>> + 0, 0, 8191, 0
>> +};
>> +
>> +IPU3Awb::IPU3Awb()
>> + : Algorithm()
>> +{
>> +}
>> +
>> +IPU3Awb::~IPU3Awb()
>> +{
>> +}
>> +
>> +void IPU3Awb::initialise(ipu3_uapi_params ¶ms, const Size &bdsOutputSize, struct ipu3_uapi_grid_config &bdsGrid)
>> +{
>> + params.use.acc_awb = 1;
>> + params.acc_param.awb.config = imguCssAwbDefaults;
>> +
>> + awbGrid_ = bdsGrid;
>> + params.acc_param.awb.config.grid = awbGrid_;
>> +
>> + params.use.obgrid = 0;
>> + params.obgrid_param.gr = 20;
>> + params.obgrid_param.r = 28;
>> + params.obgrid_param.b = 28;
>> + params.obgrid_param.gb = 20;
>> +
>> + params.use.acc_bnr = 1;
>> + params.acc_param.bnr = imguCssBnrDefaults;
>> + params.acc_param.bnr.opt_center.x_reset = -1 * (bdsOutputSize.width / 2);
>> + params.acc_param.bnr.opt_center.y_reset = -1 * (bdsOutputSize.height / 2);
>> + params.acc_param.bnr.column_size = bdsOutputSize.width;
>> + params.acc_param.bnr.opt_center_sqr.x_sqr_reset = params.acc_param.bnr.opt_center.x_reset * params.acc_param.bnr.opt_center.x_reset;
>> + params.acc_param.bnr.opt_center_sqr.y_sqr_reset = params.acc_param.bnr.opt_center.y_reset * params.acc_param.bnr.opt_center.y_reset;
>> +
>> + params.use.acc_ccm = 1;
>> + params.acc_param.ccm = imguCssCcmDefault;
>> +
>> + params.use.acc_gamma = 1;
>> + params.acc_param.gamma.gc_ctrl.enable = 1;
>> +
>> + params.use.acc_green_disparity = 0;
>> + params.acc_param.green_disparity.gd_black = 2440;
>> + params.acc_param.green_disparity.gd_red = 4;
>> + params.acc_param.green_disparity.gd_blue = 4;
>> + params.acc_param.green_disparity.gd_green = 4;
>> + params.acc_param.green_disparity.gd_shading = 24;
>> + params.acc_param.green_disparity.gd_support = 2;
>> + params.acc_param.green_disparity.gd_clip = 1;
>> + params.acc_param.green_disparity.gd_central_weight = 5;
>> +
>> + params.use.acc_cds = 1;
>> + params.acc_param.cds.csc_en = 1;
>> + params.acc_param.cds.uv_bin_output = 0;
>> + params.acc_param.cds.ds_c00 = 0;
>> + params.acc_param.cds.ds_c01 = 1;
>> + params.acc_param.cds.ds_c02 = 1;
>> + params.acc_param.cds.ds_c03 = 0;
>> + params.acc_param.cds.ds_c10 = 0;
>> + params.acc_param.cds.ds_c11 = 1;
>> + params.acc_param.cds.ds_c12 = 1;
>> + params.acc_param.cds.ds_c13 = 0;
>> + params.acc_param.cds.ds_nf = 2;
>> +
>> + wbGains_[0] = 16;
>> + wbGains_[1] = 4096;
>> + wbGains_[2] = 4096;
>> + wbGains_[3] = 16;
>> +
>> + frame_count_ = 0;
>> +}
>> +
>> +uint32_t IPU3Awb::estimateCCT(uint8_t red, uint8_t green, uint8_t blue)
>> +{
>> + double X = (-0.14282) * (red) + (1.54924) * (green) + (-0.95641) * (blue);
>> + double Y = (-0.32466) * (red) + (1.57837) * (green) + (-0.73191) * (blue);
>> + double Z = (-0.68202) * (red) + (0.77073) * (green) + (0.56332) * (blue);
>> +
>> + double x = X / (X + Y + Z);
>> + double y = Y / (X + Y + Z);
>> +
>> + double n = (x - 0.3320) / (0.1858 - y);
>> + return static_cast<uint32_t>(449 * n * n * n + 3525 * n * n + 6823.3 * n + 5520.33);
>> +}
>> +
>> +double meanValue(std::vector<uint32_t> colorValues)
>> +{
>> + uint32_t count = 0;
>> + uint32_t hist[256] = { 0 };
>> + for (uint32_t const &val : colorValues) {
>> + hist[val]++;
>> + count++;
>> + }
>> +
>> + double mean = 0.0;
>> + for (uint32_t i = 0; i < 256; i++) {
>> + mean += hist[i] * i;
>> + }
>> + return mean /= count;
>> +}
>> +
>> +void IPU3Awb::calculateWBGains(const ipu3_uapi_stats_3a *stats)
>> +{
>
> I suspect we'll want to profile this function carefully sometime.
>
Yes, but no premature optimization :-).
I agree at some point we need to profile globally, and this function
will probably appear as a costly one ;-).
>> + ASSERT(stats->stats_3a_status.awb_en);
>> +
>> + std::vector<uint32_t> redValues, greenValues, blueValues;
>> + const struct ipu3_uapi_grid_config statsAwbGrid = stats->stats_4a_config.awb_config.grid;
>> + Rectangle awbRegion = { statsAwbGrid.x_start,
>> + statsAwbGrid.y_start,
>> + static_cast<unsigned int>(statsAwbGrid.x_end - statsAwbGrid.x_start) + 1,
>> + static_cast<unsigned int>(statsAwbGrid.y_end - statsAwbGrid.y_start) + 1 };
>> +
>> + Point topleft = awbRegion.topLeft();
>> + uint32_t startY = (topleft.y >> awbGrid_.block_height_log2) * awbGrid_.width << awbGrid_.block_width_log2;
>> + uint32_t startX = (topleft.x >> awbGrid_.block_width_log2) << awbGrid_.block_width_log2;
>> + uint32_t endX = (startX + (awbRegion.size().width >> awbGrid_.block_width_log2)) << awbGrid_.block_width_log2;
>> + uint32_t count = 0;
>> + uint32_t i, j;
>> +
>
> Can we reserve the sizes for each of the redValues, greenValues blueValues?
>
> Pushing back each iteration will incur lots of reallocations everytime
> the initial reserved size is met ... I bet we're losing quite a bit of
> time here.
Yes, but there is a better one: we don't need the individual cells at
all, only the mean value. So, accumulating and dividing at the end by
the number of cells accumulated is lighter.
> I see both Gr Gb greens go into the same pot below, I guess that's expected.
Yes, it one one green at the end (I did not see any real improvment when
splitting them when I tested, but I can give it a new try).
>
>> + awbCounted_ = 0;
>> + for (j = (topleft.y >> awbGrid_.block_height_log2);
>> + j < (topleft.y >> awbGrid_.block_height_log2) + (awbRegion.size().height >> awbGrid_.block_height_log2);
>> + j++) {
>> + for (i = startX + startY; i < endX + startY; i += 8) {
>> + if (stats->awb_raw_buffer.meta_data[i + 4 + j * awbGrid_.width] == 0) {
>> + greenValues.push_back(stats->awb_raw_buffer.meta_data[i + j * awbGrid_.width]);
>> + redValues.push_back(stats->awb_raw_buffer.meta_data[i + 1 + j * awbGrid_.width]);
>> + blueValues.push_back(stats->awb_raw_buffer.meta_data[i + 2 + j * awbGrid_.width]);
>> + greenValues.push_back(stats->awb_raw_buffer.meta_data[i + 3 + j * awbGrid_.width]);
>> + awbCounted_++;
>> + }
>> + count++;
>> + }
>> + }
>> +
>> + double rMean = meanValue(redValues);
>> + double bMean = meanValue(blueValues);
>> + double gMean = meanValue(greenValues);
>
> I'm wondering if we might be able to optimise how we calculate these
> means - but ... I don't want to optimise things before they're profiled,
> and this works (I believe).
Sure we can ;-). It is a relicat from all the moments calculations I think.
We could store the sums in an internal structure, as it is done in
bcm2835_isp_stats_region.
>> +
>> + double rGain = gMean / rMean;
>> + double bGain = gMean / bMean;
>> +
>> + wbGains_[0] = 16;
>> + wbGains_[1] = 4096 * rGain;
>> + wbGains_[2] = 4096 * bGain;
>> + wbGains_[3] = 16;
>> +
>> + frame_count_++;
>> +
>> + cct_ = estimateCCT(rMean, gMean, bMean);
>> +}
>> +
>> +void IPU3Awb::updateWbParameters(ipu3_uapi_params ¶ms, double agcGamma)
>> +{
>> + if ((wbGains_[0] == 0) || (wbGains_[1] == 0) || (wbGains_[2] == 0) || (wbGains_[3] == 0)) {
>> + LOG(IPU3Awb, Error) << "Gains can't be 0, check the stats";
>> + } else {
>> + params.acc_param.bnr.wb_gains.gr = wbGains_[0];
>> + params.acc_param.bnr.wb_gains.r = wbGains_[1];
>> + params.acc_param.bnr.wb_gains.b = wbGains_[2];
>> + params.acc_param.bnr.wb_gains.gb = wbGains_[3];
>> +
>> + LOG(IPU3Awb, Debug) << "Color temperature estimated: " << cct_
>> + << " and gamma calculated: " << agcGamma;
>> + params.acc_param.ccm = imguCssCcmDefault;
>> +
>> + for (uint32_t i = 0; i < 256; i++) {
>> + double j = i / 255.0;
>> + double gamma = std::pow(j, 1.0 / agcGamma);
>> + params.acc_param.gamma.gc_lut.lut[i] = gamma * 8191;
>> + }
>> + }
>> +}
>> +
>> +} /* namespace ipa */
>> +
>> +} /* namespace libcamera */
>> diff --git a/src/ipa/ipu3/ipu3_awb.h b/src/ipa/ipu3/ipu3_awb.h
>> new file mode 100644
>> index 00000000..a14401a0
>> --- /dev/null
>> +++ b/src/ipa/ipu3/ipu3_awb.h
>> @@ -0,0 +1,44 @@
>> +/* SPDX-License-Identifier: LGPL-2.1-or-later */
>> +/*
>> + * Copyright (C) 2021, Ideas On Board
>> + *
>> + * ipu3_awb.h - IPU3 AWB control algorithm
>> + */
>> +#ifndef __LIBCAMERA_IPU3_AWB_H__
>> +#define __LIBCAMERA_IPU3_AWB_H__
>> +
>> +#include <linux/intel-ipu3.h>
>> +
>> +#include <libcamera/geometry.h>
>> +
>> +#include "libipa/algorithm.h"
>> +
>> +namespace libcamera {
>> +
>> +namespace ipa {
>> +
>> +class IPU3Awb : public Algorithm
>> +{
>> +public:
>> + IPU3Awb();
>> + ~IPU3Awb();
>> +
>> + void initialise(ipu3_uapi_params ¶ms, const Size &bdsOutputSize, struct ipu3_uapi_grid_config &bdsGrid);
>> + void calculateWBGains(const ipu3_uapi_stats_3a *stats);
>> + void updateWbParameters(ipu3_uapi_params ¶ms, double agcGamma);
>> +
>> +private:
>> + uint32_t estimateCCT(uint8_t red, uint8_t green, uint8_t blue);
>> +
>> + /* WB calculated gains */
>> + uint16_t wbGains_[4];
>> + uint32_t cct_;
>> + uint32_t awbCounted_;
>> + struct ipu3_uapi_grid_config awbGrid_;
>> + uint32_t frame_count_;
>> +};
>> +
>> +} /* namespace ipa */
>> +
>> +} /* namespace libcamera*/
>> +#endif /* __LIBCAMERA_IPU3_AWB_H__ */
>> diff --git a/src/ipa/ipu3/meson.build b/src/ipa/ipu3/meson.build
>> index a241f617..1040698e 100644
>> --- a/src/ipa/ipu3/meson.build
>> +++ b/src/ipa/ipu3/meson.build
>> @@ -2,8 +2,13 @@
>>
>> ipa_name = 'ipa_ipu3'
>>
>> +ipu3_ipa_sources = files([
>> + 'ipu3.cpp',
>> + 'ipu3_awb.cpp',
>> +])
>> +
>> mod = shared_module(ipa_name,
>> - ['ipu3.cpp', libcamera_generated_ipa_headers],
>> + [ipu3_ipa_sources, libcamera_generated_ipa_headers],
>> name_prefix : '',
>> include_directories : [ipa_includes, libipa_includes],
>> dependencies : libcamera_dep,
>>
>
More information about the libcamera-devel
mailing list