[libcamera-devel] [PATCH v4 3/4] ipa: ipu3: Add support for IPU3 AWB algorithm

Jean-Michel Hautbois jeanmichel.hautbois at ideasonboard.com
Wed Mar 31 07:44:35 CEST 2021


Hi Kieran,

On 31/03/2021 00:48, Kieran Bingham wrote:
> Hi JM
> 
> On 30/03/2021 22:12, Jean-Michel Hautbois wrote:
>> The IPA will locally modify the parameters before they are passed down
>> to the ImgU. Use a local parameter object to give a reference to those
>> algorithms.
>>
>> Inherit from the Algorithm class to implement basic AWB functions.
>>
>> Once AWB is done, a color temperature is estimated and a default CCM matrix
>> will be used (yet to be tuned).
>> Implement a basic "grey-world" AWB algorithm just for demonstration purpose.
>>
>> The BDS output size is passed by the pipeline handler to the IPA.
>> The best grid is then calculated to maximize the number of pixels taken
>> into account in each cells.
>>
>> As commented in the source code, it can be improved, as it has (at least)
>> one limitation: if a cell is big (say 128 pixels wide) and indicated as
>> saturated, it won't be taken into account at all.
>> Maybe is it possible to have a smaller one, at the cost of a few pixels
>> to lose, in which case we can center the grid using the x_start and
>> y_start parameters.
>>
>> Signed-off-by: Jean-Michel Hautbois <jeanmichel.hautbois at ideasonboard.com>
>> ---
>>  src/ipa/ipu3/ipu3.cpp     |  86 ++++++++++++--
>>  src/ipa/ipu3/ipu3_awb.cpp | 235 ++++++++++++++++++++++++++++++++++++++
>>  src/ipa/ipu3/ipu3_awb.h   |  44 +++++++
>>  src/ipa/ipu3/meson.build  |   7 +-
>>  4 files changed, 363 insertions(+), 9 deletions(-)
>>  create mode 100644 src/ipa/ipu3/ipu3_awb.cpp
>>  create mode 100644 src/ipa/ipu3/ipu3_awb.h
>>
>> diff --git a/src/ipa/ipu3/ipu3.cpp b/src/ipa/ipu3/ipu3.cpp
>> index 34a907f2..1cce11c9 100644
>> --- a/src/ipa/ipu3/ipu3.cpp
>> +++ b/src/ipa/ipu3/ipu3.cpp
>> @@ -21,6 +21,11 @@
>>  #include "libcamera/internal/buffer.h"
>>  #include "libcamera/internal/log.h"
>>  
>> +#include "ipu3_awb.h"
>> +
>> +static constexpr uint32_t kMaxCellWidthPerSet = 160;
>> +static constexpr uint32_t kMaxCellHeightPerSet = 80;
>> +
>>  namespace libcamera {
>>  
>>  LOG_DEFINE_CATEGORY(IPAIPU3)
>> @@ -49,6 +54,7 @@ private:
>>  			     const ipu3_uapi_stats_3a *stats);
>>  
>>  	void setControls(unsigned int frame);
>> +	void calculateBdsGrid(const Size &bdsOutputSize);
>>  
>>  	std::map<unsigned int, MappedFrameBuffer> buffers_;
>>  
>> @@ -61,6 +67,14 @@ private:
>>  	uint32_t gain_;
>>  	uint32_t minGain_;
>>  	uint32_t maxGain_;
>> +
>> +	/* Interface to the AWB algorithm */
>> +	std::unique_ptr<ipa::IPU3Awb> awbAlgo_;
>> +
>> +	/* Local parameter storage */
>> +	struct ipu3_uapi_params params_;
>> +
>> +	struct ipu3_uapi_grid_config bdsGrid_;
>>  };
>>  
>>  int IPAIPU3::start()
>> @@ -70,8 +84,59 @@ int IPAIPU3::start()
>>  	return 0;
>>  }
>>  
>> +/**
>> + * This method calculates a grid for the AWB algorithm in the IPU3 firmware.
>> + * Its input is the BDS output size calculated in the ImgU.
>> + * It is limited for now to the simplest method: find the lesser error
>> + * with the width/height and respective log2 width/height of the cells.
>> + *
>> + * \todo The frame is divided into cells which can be 8x8 => 128x128.
>> + * As a smaller cell improves the algorithm precision, adapting the
>> + * x_start and y_start parameters of the grid would provoke a loss of
>> + * some pixels but would also result in more accurate algorithms.
>> + */
>> +void IPAIPU3::calculateBdsGrid(const Size &bdsOutputSize)
>> +{
>> +	uint32_t minError = std::numeric_limits<uint32_t>::max();
>> +	uint32_t bestWidth = 0;
>> +	uint32_t bestHeight = 0;
>> +	uint32_t bestLog2Width = 0;
>> +	uint32_t bestLog2Height = 0;
>> +	bdsGrid_ = {};
>> +
>> +	for (uint32_t widthShift = 3; widthShift <= 7; ++widthShift) {
>> +		uint32_t width = std::min(kMaxCellWidthPerSet,
>> +					  bdsOutputSize.width >> widthShift);
>> +		width = width << widthShift;
>> +		for (uint32_t heightShift = 3; heightShift <= 7; ++heightShift) {
>> +			int32_t height = std::min(kMaxCellHeightPerSet,
>> +						  bdsOutputSize.height >> heightShift);
>> +			height = height << heightShift;
>> +			uint32_t error = std::abs(static_cast<int>(width - bdsOutputSize.width)) + std::abs(static_cast<int>(height - bdsOutputSize.height));
> 
> That's a long line ...

Checkstyle has corrected that one :-/ !

>> +
>> +			if (error > minError)
>> +				continue;
>> +
>> +			minError = error;
>> +			bestWidth = width;
>> +			bestHeight = height;
>> +			bestLog2Width = widthShift;
>> +			bestLog2Height = heightShift;
>> +		}
>> +	}
>> +
>> +	bdsGrid_.width = bestWidth >> bestLog2Width;
>> +	bdsGrid_.block_width_log2 = bestLog2Width;
>> +	bdsGrid_.height = bestHeight >> bestLog2Height;
>> +	bdsGrid_.block_height_log2 = bestLog2Height;
>> +
>> +	LOG(IPAIPU3, Debug) << "Best grid found is: ("
>> +			    << (int)bdsGrid_.width << " << " << (int)bdsGrid_.block_width_log2 << ") x ("
>> +			    << (int)bdsGrid_.height << "<<" << (int)bdsGrid_.block_height_log2 << ")";
>> +}
>> +
>>  void IPAIPU3::configure(const std::map<uint32_t, ControlInfoMap> &entityControls,
>> -			[[maybe_unused]] const Size &bdsOutputSize)
>> +			const Size &bdsOutputSize)
>>  {
>>  	if (entityControls.empty())
>>  		return;
>> @@ -92,11 +157,18 @@ void IPAIPU3::configure(const std::map<uint32_t, ControlInfoMap> &entityControls
>>  
>>  	minExposure_ = std::max(itExp->second.min().get<int32_t>(), 1);
>>  	maxExposure_ = itExp->second.max().get<int32_t>();
>> -	exposure_ = maxExposure_;
>> +	exposure_ = minExposure_;
>>  
>>  	minGain_ = std::max(itGain->second.min().get<int32_t>(), 1);
>>  	maxGain_ = itGain->second.max().get<int32_t>();
>> -	gain_ = maxGain_;
>> +	gain_ = minGain_;
>> +
>> +	params_ = {};
>> +
>> +	calculateBdsGrid(bdsOutputSize);
>> +
>> +	awbAlgo_ = std::make_unique<ipa::IPU3Awb>();
>> +	awbAlgo_->initialise(params_, bdsOutputSize, bdsGrid_);
>>  }
>>  
>>  void IPAIPU3::mapBuffers(const std::vector<IPABuffer> &buffers)
>> @@ -168,10 +240,9 @@ void IPAIPU3::processControls([[maybe_unused]] unsigned int frame,
>>  
>>  void IPAIPU3::fillParams(unsigned int frame, ipu3_uapi_params *params)
>>  {
>> -	/* Prepare parameters buffer. */
>> -	memset(params, 0, sizeof(*params));
>> +	awbAlgo_->updateWbParameters(params_, 1.0);
>>  
>> -	/* \todo Fill in parameters buffer. */
>> +	*params = params_;
>>  
>>  	ipa::ipu3::IPU3Action op;
>>  	op.op = ipa::ipu3::ActionParamFilled;
>> @@ -184,8 +255,7 @@ void IPAIPU3::parseStatistics(unsigned int frame,
>>  {
>>  	ControlList ctrls(controls::controls);
>>  
>> -	/* \todo React to statistics and update internal state machine. */
>> -	/* \todo Add meta-data information to ctrls. */
>> +	awbAlgo_->calculateWBGains(stats);
>>  
>>  	ipa::ipu3::IPU3Action op;
>>  	op.op = ipa::ipu3::ActionMetadataReady;
>> diff --git a/src/ipa/ipu3/ipu3_awb.cpp b/src/ipa/ipu3/ipu3_awb.cpp
>> new file mode 100644
>> index 00000000..18d19d36
>> --- /dev/null
>> +++ b/src/ipa/ipu3/ipu3_awb.cpp
>> @@ -0,0 +1,235 @@
>> +/* SPDX-License-Identifier: LGPL-2.1-or-later */
>> +/*
>> + * Copyright (C) 2021, Ideas On Board
>> + *
>> + * ipu3_awb.cpp - AWB control algorithm
>> + */
>> +#include "ipu3_awb.h"
>> +
>> +#include <cmath>
>> +#include <numeric>
>> +#include <unordered_map>
>> +
>> +#include "libcamera/internal/log.h"
>> +
>> +namespace libcamera {
>> +
>> +namespace ipa {
>> +
>> +LOG_DEFINE_CATEGORY(IPU3Awb)
>> +
>> +static const struct ipu3_uapi_bnr_static_config imguCssBnrDefaults = {
>> +	.wb_gains = { 16, 16, 16, 16 },
>> +	.wb_gains_thr = { 255, 255, 255, 255 },
>> +	.thr_coeffs = { 1700, 0, 31, 31, 0, 16 },
>> +	.thr_ctrl_shd = { 26, 26, 26, 26 },
>> +	.opt_center{ -648, 0, -366, 0 },
>> +	.lut = {
>> +		{ 17, 23, 28, 32, 36, 39, 42, 45,
>> +		  48, 51, 53, 55, 58, 60, 62, 64,
>> +		  66, 68, 70, 72, 73, 75, 77, 78,
>> +		  80, 82, 83, 85, 86, 88, 89, 90 } },
>> +	.bp_ctrl = { 20, 0, 1, 40, 0, 6, 0, 6, 0 },
>> +	.dn_detect_ctrl{ 9, 3, 4, 0, 8, 0, 1, 1, 1, 1, 0 },
>> +	.column_size = 1296,
>> +	.opt_center_sqr = { 419904, 133956 },
>> +};
>> +
>> +/* settings for Auto White Balance */
>> +static const struct ipu3_uapi_awb_config_s imguCssAwbDefaults = {
>> +	.rgbs_thr_gr = 8191,
>> +	.rgbs_thr_r = 8191,
>> +	.rgbs_thr_gb = 8191,
>> +	.rgbs_thr_b = 8191 | IPU3_UAPI_AWB_RGBS_THR_B_EN | IPU3_UAPI_AWB_RGBS_THR_B_INCL_SAT,
>> +	.grid = {
>> +		.width = 160,
>> +		.height = 36,
>> +		.block_width_log2 = 3,
>> +		.block_height_log2 = 4,
>> +		.height_per_slice = 1, /* Overridden by kernel. */
>> +		.x_start = 0,
>> +		.y_start = 0,
>> +		.x_end = 0,
>> +		.y_end = 0,
>> +	},
>> +};
>> +
>> +static const struct ipu3_uapi_ccm_mat_config imguCssCcmDefault = {
>> +	8191, 0, 0, 0,
>> +	0, 8191, 0, 0,
>> +	0, 0, 8191, 0
>> +};
>> +
>> +IPU3Awb::IPU3Awb()
>> +	: Algorithm()
>> +{
>> +}
>> +
>> +IPU3Awb::~IPU3Awb()
>> +{
>> +}
>> +
>> +void IPU3Awb::initialise(ipu3_uapi_params &params, const Size &bdsOutputSize, struct ipu3_uapi_grid_config &bdsGrid)
>> +{
>> +	params.use.acc_awb = 1;
>> +	params.acc_param.awb.config = imguCssAwbDefaults;
>> +
>> +	awbGrid_ = bdsGrid;
>> +	params.acc_param.awb.config.grid = awbGrid_;
>> +
>> +	params.use.obgrid = 0;
>> +	params.obgrid_param.gr = 20;
>> +	params.obgrid_param.r = 28;
>> +	params.obgrid_param.b = 28;
>> +	params.obgrid_param.gb = 20;
>> +
>> +	params.use.acc_bnr = 1;
>> +	params.acc_param.bnr = imguCssBnrDefaults;
>> +	params.acc_param.bnr.opt_center.x_reset = -1 * (bdsOutputSize.width / 2);
>> +	params.acc_param.bnr.opt_center.y_reset = -1 * (bdsOutputSize.height / 2);
>> +	params.acc_param.bnr.column_size = bdsOutputSize.width;
>> +	params.acc_param.bnr.opt_center_sqr.x_sqr_reset = params.acc_param.bnr.opt_center.x_reset * params.acc_param.bnr.opt_center.x_reset;
>> +	params.acc_param.bnr.opt_center_sqr.y_sqr_reset = params.acc_param.bnr.opt_center.y_reset * params.acc_param.bnr.opt_center.y_reset;
>> +
>> +	params.use.acc_ccm = 1;
>> +	params.acc_param.ccm = imguCssCcmDefault;
>> +
>> +	params.use.acc_gamma = 1;
>> +	params.acc_param.gamma.gc_ctrl.enable = 1;
>> +
>> +	params.use.acc_green_disparity = 0;
>> +	params.acc_param.green_disparity.gd_black = 2440;
>> +	params.acc_param.green_disparity.gd_red = 4;
>> +	params.acc_param.green_disparity.gd_blue = 4;
>> +	params.acc_param.green_disparity.gd_green = 4;
>> +	params.acc_param.green_disparity.gd_shading = 24;
>> +	params.acc_param.green_disparity.gd_support = 2;
>> +	params.acc_param.green_disparity.gd_clip = 1;
>> +	params.acc_param.green_disparity.gd_central_weight = 5;
>> +
>> +	params.use.acc_cds = 1;
>> +	params.acc_param.cds.csc_en = 1;
>> +	params.acc_param.cds.uv_bin_output = 0;
>> +	params.acc_param.cds.ds_c00 = 0;
>> +	params.acc_param.cds.ds_c01 = 1;
>> +	params.acc_param.cds.ds_c02 = 1;
>> +	params.acc_param.cds.ds_c03 = 0;
>> +	params.acc_param.cds.ds_c10 = 0;
>> +	params.acc_param.cds.ds_c11 = 1;
>> +	params.acc_param.cds.ds_c12 = 1;
>> +	params.acc_param.cds.ds_c13 = 0;
>> +	params.acc_param.cds.ds_nf = 2;
>> +
>> +	wbGains_[0] = 16;
>> +	wbGains_[1] = 4096;
>> +	wbGains_[2] = 4096;
>> +	wbGains_[3] = 16;
>> +
>> +	frame_count_ = 0;
>> +}
>> +
>> +uint32_t IPU3Awb::estimateCCT(uint8_t red, uint8_t green, uint8_t blue)
>> +{
>> +	double X = (-0.14282) * (red) + (1.54924) * (green) + (-0.95641) * (blue);
>> +	double Y = (-0.32466) * (red) + (1.57837) * (green) + (-0.73191) * (blue);
>> +	double Z = (-0.68202) * (red) + (0.77073) * (green) + (0.56332) * (blue);
>> +
>> +	double x = X / (X + Y + Z);
>> +	double y = Y / (X + Y + Z);
>> +
>> +	double n = (x - 0.3320) / (0.1858 - y);
>> +	return static_cast<uint32_t>(449 * n * n * n + 3525 * n * n + 6823.3 * n + 5520.33);
>> +}
>> +
>> +double meanValue(std::vector<uint32_t> colorValues)
>> +{
>> +	uint32_t count = 0;
>> +	uint32_t hist[256] = { 0 };
>> +	for (uint32_t const &val : colorValues) {
>> +		hist[val]++;
>> +		count++;
>> +	}
>> +
>> +	double mean = 0.0;
>> +	for (uint32_t i = 0; i < 256; i++) {
>> +		mean += hist[i] * i;
>> +	}
>> +	return mean /= count;
>> +}
>> +
>> +void IPU3Awb::calculateWBGains(const ipu3_uapi_stats_3a *stats)
>> +{
> 
> I suspect we'll want to profile this function carefully sometime.
> 

Yes, but no premature optimization :-).
I agree at some point we need to profile globally, and this function
will probably appear as a costly one ;-).

>> +	ASSERT(stats->stats_3a_status.awb_en);
>> +
>> +	std::vector<uint32_t> redValues, greenValues, blueValues;
>> +	const struct ipu3_uapi_grid_config statsAwbGrid = stats->stats_4a_config.awb_config.grid;
>> +	Rectangle awbRegion = { statsAwbGrid.x_start,
>> +				statsAwbGrid.y_start,
>> +				static_cast<unsigned int>(statsAwbGrid.x_end - statsAwbGrid.x_start) + 1,
>> +				static_cast<unsigned int>(statsAwbGrid.y_end - statsAwbGrid.y_start) + 1 };
>> +
>> +	Point topleft = awbRegion.topLeft();
>> +	uint32_t startY = (topleft.y >> awbGrid_.block_height_log2) * awbGrid_.width << awbGrid_.block_width_log2;
>> +	uint32_t startX = (topleft.x >> awbGrid_.block_width_log2) << awbGrid_.block_width_log2;
>> +	uint32_t endX = (startX + (awbRegion.size().width >> awbGrid_.block_width_log2)) << awbGrid_.block_width_log2;
>> +	uint32_t count = 0;
>> +	uint32_t i, j;
>> +
> 
> Can we reserve the sizes for each of the redValues, greenValues blueValues?
> 
> Pushing back each iteration will incur lots of reallocations everytime
> the initial reserved size is met ... I bet we're losing quite a bit of
> time here.

Yes, but there is a better one: we don't need the individual cells at
all, only the mean value. So, accumulating and dividing at the end by
the number of cells accumulated is lighter.

> I see both Gr Gb greens go into the same pot below, I guess that's expected.

Yes, it one one green at the end (I did not see any real improvment when
splitting them when I tested, but I can give it a new try).

> 
>> +	awbCounted_ = 0;
>> +	for (j = (topleft.y >> awbGrid_.block_height_log2);
>> +	     j < (topleft.y >> awbGrid_.block_height_log2) + (awbRegion.size().height >> awbGrid_.block_height_log2);
>> +	     j++) {
>> +		for (i = startX + startY; i < endX + startY; i += 8) {
>> +			if (stats->awb_raw_buffer.meta_data[i + 4 + j * awbGrid_.width] == 0) {
>> +				greenValues.push_back(stats->awb_raw_buffer.meta_data[i + j * awbGrid_.width]);
>> +				redValues.push_back(stats->awb_raw_buffer.meta_data[i + 1 + j * awbGrid_.width]);
>> +				blueValues.push_back(stats->awb_raw_buffer.meta_data[i + 2 + j * awbGrid_.width]);
>> +				greenValues.push_back(stats->awb_raw_buffer.meta_data[i + 3 + j * awbGrid_.width]);
>> +				awbCounted_++;
>> +			}
>> +			count++;
>> +		}
>> +	}
>> +
>> +	double rMean = meanValue(redValues);
>> +	double bMean = meanValue(blueValues);
>> +	double gMean = meanValue(greenValues);
> 
> I'm wondering if we might be able to optimise how we calculate these
> means - but ... I don't want to optimise things before they're profiled,
> and this works (I believe).

Sure we can ;-). It is a relicat from all the moments calculations I think.
We could store the sums in an internal structure, as it is done in
bcm2835_isp_stats_region.

>> +
>> +	double rGain = gMean / rMean;
>> +	double bGain = gMean / bMean;
>> +
>> +	wbGains_[0] = 16;
>> +	wbGains_[1] = 4096 * rGain;
>> +	wbGains_[2] = 4096 * bGain;
>> +	wbGains_[3] = 16;
>> +
>> +	frame_count_++;
>> +
>> +	cct_ = estimateCCT(rMean, gMean, bMean);
>> +}
>> +
>> +void IPU3Awb::updateWbParameters(ipu3_uapi_params &params, double agcGamma)
>> +{
>> +	if ((wbGains_[0] == 0) || (wbGains_[1] == 0) || (wbGains_[2] == 0) || (wbGains_[3] == 0)) {
>> +		LOG(IPU3Awb, Error) << "Gains can't be 0, check the stats";
>> +	} else {
>> +		params.acc_param.bnr.wb_gains.gr = wbGains_[0];
>> +		params.acc_param.bnr.wb_gains.r = wbGains_[1];
>> +		params.acc_param.bnr.wb_gains.b = wbGains_[2];
>> +		params.acc_param.bnr.wb_gains.gb = wbGains_[3];
>> +
>> +		LOG(IPU3Awb, Debug) << "Color temperature estimated: " << cct_
>> +				    << " and gamma calculated: " << agcGamma;
>> +		params.acc_param.ccm = imguCssCcmDefault;
>> +
>> +		for (uint32_t i = 0; i < 256; i++) {
>> +			double j = i / 255.0;
>> +			double gamma = std::pow(j, 1.0 / agcGamma);
>> +			params.acc_param.gamma.gc_lut.lut[i] = gamma * 8191;
>> +		}
>> +	}
>> +}
>> +
>> +} /* namespace ipa */
>> +
>> +} /* namespace libcamera */
>> diff --git a/src/ipa/ipu3/ipu3_awb.h b/src/ipa/ipu3/ipu3_awb.h
>> new file mode 100644
>> index 00000000..a14401a0
>> --- /dev/null
>> +++ b/src/ipa/ipu3/ipu3_awb.h
>> @@ -0,0 +1,44 @@
>> +/* SPDX-License-Identifier: LGPL-2.1-or-later */
>> +/*
>> + * Copyright (C) 2021, Ideas On Board
>> + *
>> + * ipu3_awb.h - IPU3 AWB control algorithm
>> + */
>> +#ifndef __LIBCAMERA_IPU3_AWB_H__
>> +#define __LIBCAMERA_IPU3_AWB_H__
>> +
>> +#include <linux/intel-ipu3.h>
>> +
>> +#include <libcamera/geometry.h>
>> +
>> +#include "libipa/algorithm.h"
>> +
>> +namespace libcamera {
>> +
>> +namespace ipa {
>> +
>> +class IPU3Awb : public Algorithm
>> +{
>> +public:
>> +	IPU3Awb();
>> +	~IPU3Awb();
>> +
>> +	void initialise(ipu3_uapi_params &params, const Size &bdsOutputSize, struct ipu3_uapi_grid_config &bdsGrid);
>> +	void calculateWBGains(const ipu3_uapi_stats_3a *stats);
>> +	void updateWbParameters(ipu3_uapi_params &params, double agcGamma);
>> +
>> +private:
>> +	uint32_t estimateCCT(uint8_t red, uint8_t green, uint8_t blue);
>> +
>> +	/* WB calculated gains */
>> +	uint16_t wbGains_[4];
>> +	uint32_t cct_;
>> +	uint32_t awbCounted_;
>> +	struct ipu3_uapi_grid_config awbGrid_;
>> +	uint32_t frame_count_;
>> +};
>> +
>> +} /* namespace ipa */
>> +
>> +} /* namespace libcamera*/
>> +#endif /* __LIBCAMERA_IPU3_AWB_H__ */
>> diff --git a/src/ipa/ipu3/meson.build b/src/ipa/ipu3/meson.build
>> index a241f617..1040698e 100644
>> --- a/src/ipa/ipu3/meson.build
>> +++ b/src/ipa/ipu3/meson.build
>> @@ -2,8 +2,13 @@
>>  
>>  ipa_name = 'ipa_ipu3'
>>  
>> +ipu3_ipa_sources = files([
>> +    'ipu3.cpp',
>> +    'ipu3_awb.cpp',
>> +])
>> +
>>  mod = shared_module(ipa_name,
>> -                    ['ipu3.cpp', libcamera_generated_ipa_headers],
>> +                    [ipu3_ipa_sources, libcamera_generated_ipa_headers],
>>                      name_prefix : '',
>>                      include_directories : [ipa_includes, libipa_includes],
>>                      dependencies : libcamera_dep,
>>
> 


More information about the libcamera-devel mailing list