[libcamera-devel] [PATCH v2 03/10] ipa: raspberrypi: Generalise the ALSC algorithm

Wed Mar 29 01:21:34 CEST 2023

Quoting Naushir Patuck via libcamera-devel (2023-03-27 13:20:23)
> Remove any hard-coded assumptions about the target hardware platform
> from the ALSC algorithm. Instead, use the "target" string provided by
> the camera tuning config and generalised statistics structures to
> determing parameters such as grid and region sizes.
> 
> The ALSC calculations use run-time allocated arrays/vectors on every
> frame. Allocating these might add a non-trivial run-time penalty.
> Replace these dynamic allocations with a set of reusable pre-allocated
> vectors during the init phase.
> 
> Signed-off-by: Naushir Patuck <naush at raspberrypi.com>
> Signed-off-by: David Plowman <david.plowman at raspberrypi.com>
> Reviewed-off-by: David Plowman <david.plowman at raspberrypi.com>

Well this has taken me at least 2 or 3 reads to parse. And I haven't
seen anything specific that worries me overall.

The only thing to check below is
 - Alsc::read using awbRegions

And I almost tripped up on the IIR filter - but I think it's fine, it
was just me mis-reading it.

If the awb region is fine:

Reviewed-by: Kieran Bingham <kieran.bingham at ideasonboard.com>

> ---
>  src/ipa/raspberrypi/controller/alsc_status.h |  13 +-
>  src/ipa/raspberrypi/controller/rpi/alsc.cpp  | 341 +++++++++++--------
>  src/ipa/raspberrypi/controller/rpi/alsc.h    |  29 +-
>  src/ipa/raspberrypi/raspberrypi.cpp          |   9 +-
>  4 files changed, 224 insertions(+), 168 deletions(-)
> 
> diff --git a/src/ipa/raspberrypi/controller/alsc_status.h b/src/ipa/raspberrypi/controller/alsc_status.h
> index e5aa7e37c330..49a9f4a0cb5a 100644
> --- a/src/ipa/raspberrypi/controller/alsc_status.h
> +++ b/src/ipa/raspberrypi/controller/alsc_status.h
> @@ -6,16 +6,17 @@
>   */
>  #pragma once
>  
> +#include <vector>
> +
>  /*
>   * The ALSC algorithm should post the following structure into the image's
>   * "alsc.status" metadata.
>   */
>  
> -constexpr unsigned int AlscCellsX = 16;
> -constexpr unsigned int AlscCellsY = 12;
> -
>  struct AlscStatus {
> -       double r[AlscCellsY][AlscCellsX];
> -       double g[AlscCellsY][AlscCellsX];
> -       double b[AlscCellsY][AlscCellsX];
> +       std::vector<double> r;
> +       std::vector<double> g;
> +       std::vector<double> b;
> +       unsigned int rows;
> +       unsigned int cols;
>  };
> diff --git a/src/ipa/raspberrypi/controller/rpi/alsc.cpp b/src/ipa/raspberrypi/controller/rpi/alsc.cpp
> index eb4e2f9496e1..51fe5d73f52d 100644
> --- a/src/ipa/raspberrypi/controller/rpi/alsc.cpp
> +++ b/src/ipa/raspberrypi/controller/rpi/alsc.cpp
> @@ -5,6 +5,7 @@
>   * alsc.cpp - ALSC (auto lens shading correction) control algorithm
>   */
>  
> +#include <algorithm>
>  #include <functional>
>  #include <math.h>
>  #include <numeric>
> @@ -24,9 +25,6 @@ LOG_DEFINE_CATEGORY(RPiAlsc)
>  
>  #define NAME "rpi.alsc"
>  
> -static const int X = AlscCellsX;
> -static const int Y = AlscCellsY;
> -static const int XY = X * Y;
>  static const double InsufficientData = -1.0;
>  
>  Alsc::Alsc(Controller *controller)
> @@ -51,8 +49,11 @@ char const *Alsc::name() const
>         return NAME;
>  }
>  
> -static int generateLut(double *lut, const libcamera::YamlObject &params)
> +static int generateLut(std::vector<double> &lut, const libcamera::YamlObject &params,
> +                      const Size &size)
>  {
> +       /* These must be signed ints for the co-ordinate calculations below. */
> +       int X = size.width, Y = size.height;
>         double cstrength = params["corner_strength"].get<double>(2.0);
>         if (cstrength <= 1.0) {
>                 LOG(RPiAlsc, Error) << "corner_strength must be > 1.0";
> @@ -81,9 +82,9 @@ static int generateLut(double *lut, const libcamera::YamlObject &params)
>         return 0;
>  }
>  
> -static int readLut(double *lut, const libcamera::YamlObject &params)
> +static int readLut(std::vector<double> &lut, const libcamera::YamlObject &params, const Size &size)
>  {
> -       if (params.size() != XY) {
> +       if (params.size() != size.width * size.height) {
>                 LOG(RPiAlsc, Error) << "Invalid number of entries in LSC table";
>                 return -EINVAL;
>         }
> @@ -101,7 +102,7 @@ static int readLut(double *lut, const libcamera::YamlObject &params)
>  
>  static int readCalibrations(std::vector<AlscCalibration> &calibrations,
>                             const libcamera::YamlObject &params,
> -                           std::string const &name)
> +                           std::string const &name, const Size &size)
>  {
>         if (params.contains(name)) {
>                 double lastCt = 0;
> @@ -119,7 +120,7 @@ static int readCalibrations(std::vector<AlscCalibration> &calibrations,
>                         calibration.ct = lastCt = ct;
>  
>                         const libcamera::YamlObject &table = p["table"];
> -                       if (table.size() != XY) {
> +                       if (table.size() != size.width * size.height) {
>                                 LOG(RPiAlsc, Error)
>                                         << "Incorrect number of values for ct "
>                                         << ct << " in " << name;
> @@ -127,6 +128,7 @@ static int readCalibrations(std::vector<AlscCalibration> &calibrations,
>                         }
>  
>                         int num = 0;
> +                       calibration.table.resize(size.width * size.height);
>                         for (const auto &elem : table.asList()) {
>                                 value = elem.get<double>();
>                                 if (!value)
> @@ -134,7 +136,7 @@ static int readCalibrations(std::vector<AlscCalibration> &calibrations,
>                                 calibration.table[num++] = *value;
>                         }
>  
> -                       calibrations.push_back(calibration);
> +                       calibrations.push_back(std::move(calibration));
>                         LOG(RPiAlsc, Debug)
>                                 << "Read " << name << " calibration for ct " << ct;
>                 }
> @@ -144,6 +146,7 @@ static int readCalibrations(std::vector<AlscCalibration> &calibrations,
>  
>  int Alsc::read(const libcamera::YamlObject &params)
>  {
> +       config_.tableSize = getHardwareConfig().awbRegions;

I suspect this is fine, but is the ALSC always based on the same
regions as the AWB ?

>         config_.framePeriod = params["frame_period"].get<uint16_t>(12);
>         config_.startupFrames = params["startup_frames"].get<uint16_t>(10);
>         config_.speed = params["speed"].get<double>(0.05);
> @@ -153,28 +156,29 @@ int Alsc::read(const libcamera::YamlObject &params)
>         config_.minCount = params["min_count"].get<double>(10.0);
>         config_.minG = params["min_G"].get<uint16_t>(50);
>         config_.omega = params["omega"].get<double>(1.3);
> -       config_.nIter = params["n_iter"].get<uint32_t>(X + Y);
> +       config_.nIter = params["n_iter"].get<uint32_t>(config_.tableSize.width + config_.tableSize.height);
>         config_.luminanceStrength =
>                 params["luminance_strength"].get<double>(1.0);
> -       for (int i = 0; i < XY; i++)
> -               config_.luminanceLut[i] = 1.0;
>  
> +       config_.luminanceLut.resize(config_.tableSize.width * config_.tableSize.height, 1.0);
>         int ret = 0;
>  
>         if (params.contains("corner_strength"))
> -               ret = generateLut(config_.luminanceLut, params);
> +               ret = generateLut(config_.luminanceLut, params, config_.tableSize);
>         else if (params.contains("luminance_lut"))
> -               ret = readLut(config_.luminanceLut, params["luminance_lut"]);
> +               ret = readLut(config_.luminanceLut, params["luminance_lut"], config_.tableSize);
>         else
>                 LOG(RPiAlsc, Warning)
>                         << "no luminance table - assume unity everywhere";
>         if (ret)
>                 return ret;
>  
> -       ret = readCalibrations(config_.calibrationsCr, params, "calibrations_Cr");
> +       ret = readCalibrations(config_.calibrationsCr, params, "calibrations_Cr",
> +                              config_.tableSize);
>         if (ret)
>                 return ret;
> -       ret = readCalibrations(config_.calibrationsCb, params, "calibrations_Cb");
> +       ret = readCalibrations(config_.calibrationsCb, params, "calibrations_Cb",
> +                              config_.tableSize);
>         if (ret)
>                 return ret;
>  
> @@ -187,13 +191,16 @@ int Alsc::read(const libcamera::YamlObject &params)
>  
>  static double getCt(Metadata *metadata, double defaultCt);
>  static void getCalTable(double ct, std::vector<AlscCalibration> const &calibrations,
> -                       double calTable[XY]);
> -static void resampleCalTable(double const calTableIn[XY], CameraMode const &cameraMode,
> -                            double calTableOut[XY]);
> -static void compensateLambdasForCal(double const calTable[XY], double const oldLambdas[XY],
> -                                   double newLambdas[XY]);
> -static void addLuminanceToTables(double results[3][Y][X], double const lambdaR[XY], double lambdaG,
> -                                double const lambdaB[XY], double const luminanceLut[XY],
> +                       std::vector<double> &calTable);
> +static void resampleCalTable(const std::vector<double> &calTableIn, CameraMode const &cameraMode,
> +                            const Size &size, std::vector<double> &calTableOut);
> +static void compensateLambdasForCal(const std::vector<double> &calTable,
> +                                   const std::vector<double> &oldLambdas,
> +                                   std::vector<double> &newLambdas);
> +static void addLuminanceToTables(std::array<std::vector<double>, 3> &results,
> +                                const std::vector<double> &lambdaR, double lambdaG,
> +                                const std::vector<double> &lambdaB,
> +                                const std::vector<double> &luminanceLut,
>                                  double luminanceStrength);
>  
>  void Alsc::initialise()
> @@ -201,7 +208,28 @@ void Alsc::initialise()
>         frameCount2_ = frameCount_ = framePhase_ = 0;
>         firstTime_ = true;
>         ct_ = config_.defaultCt;
> +
> +       const size_t XY = config_.tableSize.width * config_.tableSize.height;
> +
> +       for (auto &r : syncResults_)
> +               r.resize(XY);
> +       for (auto &r : prevSyncResults_)
> +               r.resize(XY);
> +       for (auto &r : asyncResults_)
> +               r.resize(XY);
> +
> +       luminanceTable_.resize(XY);
> +       asyncLambdaR_.resize(XY);
> +       asyncLambdaB_.resize(XY);
>         /* The lambdas are initialised in the SwitchMode. */
> +       lambdaR_.resize(XY);
> +       lambdaB_.resize(XY);
> +
> +       /* Temporaries for the computations, but sensible to allocate this up-front! */
> +       for (auto &c : tmpC_)
> +               c.resize(XY);
> +       for (auto &m : tmpM_)
> +               m.resize(XY);
>  }
>  
>  void Alsc::waitForAysncThread()
> @@ -262,7 +290,7 @@ void Alsc::switchMode(CameraMode const &cameraMode,
>          * We must resample the luminance table like we do the others, but it's
>          * fixed so we can simply do it up front here.
>          */
> -       resampleCalTable(config_.luminanceLut, cameraMode_, luminanceTable_);
> +       resampleCalTable(config_.luminanceLut, cameraMode_, config_.tableSize, luminanceTable_);
>  
>         if (resetTables) {
>                 /*
> @@ -272,18 +300,18 @@ void Alsc::switchMode(CameraMode const &cameraMode,
>                  * the lambdas, but the rest of this code then echoes the code in
>                  * doAlsc, without the adaptive algorithm.
>                  */
> -               for (int i = 0; i < XY; i++)
> -                       lambdaR_[i] = lambdaB_[i] = 1.0;
> -               double calTableR[XY], calTableB[XY], calTableTmp[XY];
> +               std::fill(lambdaR_.begin(), lambdaR_.end(), 1.0);
> +               std::fill(lambdaB_.begin(), lambdaB_.end(), 1.0);
> +               std::vector<double> &calTableR = tmpC_[0], &calTableB = tmpC_[1], &calTableTmp = tmpC_[2];
>                 getCalTable(ct_, config_.calibrationsCr, calTableTmp);
> -               resampleCalTable(calTableTmp, cameraMode_, calTableR);
> +               resampleCalTable(calTableTmp, cameraMode_, config_.tableSize, calTableR);
>                 getCalTable(ct_, config_.calibrationsCb, calTableTmp);
> -               resampleCalTable(calTableTmp, cameraMode_, calTableB);
> +               resampleCalTable(calTableTmp, cameraMode_, config_.tableSize, calTableB);
>                 compensateLambdasForCal(calTableR, lambdaR_, asyncLambdaR_);
>                 compensateLambdasForCal(calTableB, lambdaB_, asyncLambdaB_);
>                 addLuminanceToTables(syncResults_, asyncLambdaR_, 1.0, asyncLambdaB_,
>                                      luminanceTable_, config_.luminanceStrength);
> -               memcpy(prevSyncResults_, syncResults_, sizeof(prevSyncResults_));
> +               prevSyncResults_ = syncResults_;
>                 framePhase_ = config_.framePeriod; /* run the algo again asap */
>                 firstTime_ = false;
>         }
> @@ -294,7 +322,7 @@ void Alsc::fetchAsyncResults()
>         LOG(RPiAlsc, Debug) << "Fetch ALSC results";
>         asyncFinished_ = false;
>         asyncStarted_ = false;
> -       memcpy(syncResults_, asyncResults_, sizeof(syncResults_));
> +       syncResults_ = asyncResults_;
>  }
>  
>  double getCt(Metadata *metadata, double defaultCt)
> @@ -316,9 +344,9 @@ static void copyStats(RgbyRegions &regions, StatisticsPtr &stats,
>         if (!regions.numRegions())
>                 regions.init(stats->awbRegions.size());
>  
> -       double *rTable = (double *)status.r;
> -       double *gTable = (double *)status.g;
> -       double *bTable = (double *)status.b;
> +       const std::vector<double> &rTable = status.r;
> +       const std::vector<double> &gTable = status.g;
> +       const std::vector<double> &bTable = status.b;
>         for (unsigned int i = 0; i < stats->awbRegions.numRegions(); i++) {
>                 auto r = stats->awbRegions.get(i);
>                 r.val.rSum = static_cast<uint64_t>(r.val.rSum / rTable[i]);
> @@ -344,12 +372,9 @@ void Alsc::restartAsync(StatisticsPtr &stats, Metadata *imageMetadata)
>         if (imageMetadata->get("alsc.status", alscStatus) != 0) {
>                 LOG(RPiAlsc, Warning)
>                         << "No ALSC status found for applied gains!";
> -               for (int y = 0; y < Y; y++)
> -                       for (int x = 0; x < X; x++) {
> -                               alscStatus.r[y][x] = 1.0;
> -                               alscStatus.g[y][x] = 1.0;
> -                               alscStatus.b[y][x] = 1.0;
> -                       }
> +               alscStatus.r.resize(config_.tableSize.width * config_.tableSize.height, 1.0);
> +               alscStatus.g.resize(config_.tableSize.width * config_.tableSize.height, 1.0);
> +               alscStatus.b.resize(config_.tableSize.width * config_.tableSize.height, 1.0);
>         }
>         copyStats(statistics_, stats, alscStatus);
>         framePhase_ = 0;
> @@ -380,15 +405,15 @@ void Alsc::prepare(Metadata *imageMetadata)
>                         fetchAsyncResults();
>         }
>         /* Apply IIR filter to results and program into the pipeline. */
> -       double *ptr = (double *)syncResults_,
> -              *pptr = (double *)prevSyncResults_;
> -       for (unsigned int i = 0; i < sizeof(syncResults_) / sizeof(double); i++)
> -               pptr[i] = speed * ptr[i] + (1.0 - speed) * pptr[i];
> +       for (unsigned int j = 0; j < syncResults_.size(); j++) {
> +               for (unsigned int i = 0; i < syncResults_[j].size(); i++)
> +                       prevSyncResults_[j][i] = speed * syncResults_[j][i] + (1.0 - speed) * prevSyncResults_[j][i];

I almost slipped up here thinking this read as "X = Y" ... but I
misread. Definitely using both 'syncResults_' 'prevSyncResults_' so I
think this is fine.

> +       }
>         /* Put output values into status metadata. */
>         AlscStatus status;
> -       memcpy(status.r, prevSyncResults_[0], sizeof(status.r));
> -       memcpy(status.g, prevSyncResults_[1], sizeof(status.g));
> -       memcpy(status.b, prevSyncResults_[2], sizeof(status.b));
> +       status.r = prevSyncResults_[0];
> +       status.g = prevSyncResults_[1];
> +       status.b = prevSyncResults_[2];
>         imageMetadata->set("alsc.status", status);
>  }
>  
> @@ -432,18 +457,17 @@ void Alsc::asyncFunc()
>  }
>  
>  void getCalTable(double ct, std::vector<AlscCalibration> const &calibrations,
> -                double calTable[XY])
> +                std::vector<double> &calTable)
>  {
>         if (calibrations.empty()) {
> -               for (int i = 0; i < XY; i++)
> -                       calTable[i] = 1.0;
> +               std::fill(calTable.begin(), calTable.end(), 1.0);
>                 LOG(RPiAlsc, Debug) << "no calibrations found";
>         } else if (ct <= calibrations.front().ct) {
> -               memcpy(calTable, calibrations.front().table, XY * sizeof(double));
> +               calTable = calibrations.front().table;
>                 LOG(RPiAlsc, Debug) << "using calibration for "
>                                     << calibrations.front().ct;
>         } else if (ct >= calibrations.back().ct) {
> -               memcpy(calTable, calibrations.back().table, XY * sizeof(double));
> +               calTable = calibrations.back().table;
>                 LOG(RPiAlsc, Debug) << "using calibration for "
>                                     << calibrations.back().ct;
>         } else {
> @@ -454,7 +478,7 @@ void getCalTable(double ct, std::vector<AlscCalibration> const &calibrations,
>                 LOG(RPiAlsc, Debug)
>                         << "ct is " << ct << ", interpolating between "
>                         << ct0 << " and " << ct1;
> -               for (int i = 0; i < XY; i++)
> +               for (unsigned int i = 0; i < calTable.size(); i++)
>                         calTable[i] =
>                                 (calibrations[idx].table[i] * (ct1 - ct) +
>                                  calibrations[idx + 1].table[i] * (ct - ct0)) /
> @@ -462,9 +486,13 @@ void getCalTable(double ct, std::vector<AlscCalibration> const &calibrations,
>         }
>  }
>  
> -void resampleCalTable(double const calTableIn[XY],
> -                     CameraMode const &cameraMode, double calTableOut[XY])
> +void resampleCalTable(const std::vector<double> &calTableIn,
> +                     CameraMode const &cameraMode, const Size &size,
> +                     std::vector<double> &calTableOut)
>  {
> +       int X = size.width;
> +       int Y = size.height;
> +
>         /*
>          * Precalculate and cache the x sampling locations and phases to save
>          * recomputing them on every row.
> @@ -501,23 +529,24 @@ void resampleCalTable(double const calTableIn[XY],
>                         yLo = Y - 1 - yLo;
>                         yHi = Y - 1 - yHi;
>                 }
> -               double const *rowAbove = calTableIn + X * yLo;
> -               double const *rowBelow = calTableIn + X * yHi;
> +               double const *rowAbove = calTableIn.data() + X * yLo;
> +               double const *rowBelow = calTableIn.data() + X * yHi;
> +               double *out = calTableOut.data() + X * j;
>                 for (int i = 0; i < X; i++) {
>                         double above = rowAbove[xLo[i]] * (1 - xf[i]) +
>                                        rowAbove[xHi[i]] * xf[i];
>                         double below = rowBelow[xLo[i]] * (1 - xf[i]) +
>                                        rowBelow[xHi[i]] * xf[i];
> -                       *(calTableOut++) = above * (1 - yf) + below * yf;
> +                       *(out++) = above * (1 - yf) + below * yf;
>                 }
>         }
>  }
>  
>  /* Calculate chrominance statistics (R/G and B/G) for each region. */
> -static void calculateCrCb(const RgbyRegions &awbRegion, double cr[XY],
> -                         double cb[XY], uint32_t minCount, uint16_t minG)
> +static void calculateCrCb(const RgbyRegions &awbRegion, std::vector<double> &cr,
> +                         std::vector<double> &cb, uint32_t minCount, uint16_t minG)
>  {
> -       for (int i = 0; i < XY; i++) {
> +       for (unsigned int i = 0; i < cr.size(); i++) {
>                 auto s = awbRegion.get(i);
>  
>                 if (s.counted <= minCount || s.val.gSum / s.counted <= minG) {
> @@ -530,33 +559,34 @@ static void calculateCrCb(const RgbyRegions &awbRegion, double cr[XY],
>         }
>  }
>  
> -static void applyCalTable(double const calTable[XY], double C[XY])
> +static void applyCalTable(const std::vector<double> &calTable, std::vector<double> &C)
>  {
> -       for (int i = 0; i < XY; i++)
> +       for (unsigned int i = 0; i < C.size(); i++)
>                 if (C[i] != InsufficientData)
>                         C[i] *= calTable[i];
>  }
>  
> -void compensateLambdasForCal(double const calTable[XY],
> -                            double const oldLambdas[XY],
> -                            double newLambdas[XY])
> +void compensateLambdasForCal(const std::vector<double> &calTable,
> +                            const std::vector<double> &oldLambdas,
> +                            std::vector<double> &newLambdas)
>  {
>         double minNewLambda = std::numeric_limits<double>::max();
> -       for (int i = 0; i < XY; i++) {
> +       for (unsigned int i = 0; i < newLambdas.size(); i++) {
>                 newLambdas[i] = oldLambdas[i] * calTable[i];
>                 minNewLambda = std::min(minNewLambda, newLambdas[i]);
>         }
> -       for (int i = 0; i < XY; i++)
> +       for (unsigned int i = 0; i < newLambdas.size(); i++)
>                 newLambdas[i] /= minNewLambda;
>  }
>  
> -[[maybe_unused]] static void printCalTable(double const C[XY])
> +[[maybe_unused]] static void printCalTable(const std::vector<double> &C,
> +                                          const Size &size)
>  {
>         printf("table: [\n");
> -       for (int j = 0; j < Y; j++) {
> -               for (int i = 0; i < X; i++) {
> -                       printf("%5.3f", 1.0 / C[j * X + i]);
> -                       if (i != X - 1 || j != Y - 1)
> +       for (unsigned int j = 0; j < size.height; j++) {
> +               for (unsigned int i = 0; i < size.width; i++) {
> +                       printf("%5.3f", 1.0 / C[j * size.width + i]);
> +                       if (i != size.width - 1 || j != size.height - 1)
>                                 printf(",");
>                 }
>                 printf("\n");
> @@ -577,9 +607,13 @@ static double computeWeight(double Ci, double Cj, double sigma)
>  }
>  
>  /* Compute all weights. */
> -static void computeW(double const C[XY], double sigma, double W[XY][4])
> +static void computeW(const std::vector<double> &C, double sigma,
> +                    std::vector<std::array<double, 4>> &W, const Size &size)
>  {
> -       for (int i = 0; i < XY; i++) {
> +       size_t XY = size.width * size.height;
> +       size_t X = size.width;
> +
> +       for (unsigned int i = 0; i < XY; i++) {
>                 /* Start with neighbour above and go clockwise. */
>                 W[i][0] = i >= X ? computeWeight(C[i], C[i - X], sigma) : 0;
>                 W[i][1] = i % X < X - 1 ? computeWeight(C[i], C[i + 1], sigma) : 0;
> @@ -589,11 +623,16 @@ static void computeW(double const C[XY], double sigma, double W[XY][4])
>  }
>  
>  /* Compute M, the large but sparse matrix such that M * lambdas = 0. */
> -static void constructM(double const C[XY], double const W[XY][4],
> -                      double M[XY][4])
> +static void constructM(const std::vector<double> &C,
> +                      const std::vector<std::array<double, 4>> &W,
> +                      std::vector<std::array<double, 4>> &M,
> +                      const Size &size)
>  {
> +       size_t XY = size.width * size.height;
> +       size_t X = size.width;
> +
>         double epsilon = 0.001;
> -       for (int i = 0; i < XY; i++) {
> +       for (unsigned int i = 0; i < XY; i++) {
>                 /*
>                  * Note how, if C[i] == INSUFFICIENT_DATA, the weights will all
>                  * be zero so the equation is still set up correctly.
> @@ -614,79 +653,80 @@ static void constructM(double const C[XY], double const W[XY][4],
>   * left/right neighbours are zero down the left/right edges, so we don't need
>   * need to test the i value to exclude them.
>   */
> -static double computeLambdaBottom(int i, double const M[XY][4],
> -                                 double lambda[XY])
> +static double computeLambdaBottom(int i, const std::vector<std::array<double, 4>> &M,
> +                                 std::vector<double> &lambda, const Size &size)
>  {
> -       return M[i][1] * lambda[i + 1] + M[i][2] * lambda[i + X] +
> +       return M[i][1] * lambda[i + 1] + M[i][2] * lambda[i + size.width] +
>                M[i][3] * lambda[i - 1];
>  }
> -static double computeLambdaBottomStart(int i, double const M[XY][4],
> -                                      double lambda[XY])
> +static double computeLambdaBottomStart(int i, const std::vector<std::array<double, 4>> &M,
> +                                      std::vector<double> &lambda, const Size &size)
>  {
> -       return M[i][1] * lambda[i + 1] + M[i][2] * lambda[i + X];
> +       return M[i][1] * lambda[i + 1] + M[i][2] * lambda[i + size.width];
>  }
> -static double computeLambdaInterior(int i, double const M[XY][4],
> -                                   double lambda[XY])
> +static double computeLambdaInterior(int i, const std::vector<std::array<double, 4>> &M,
> +                                   std::vector<double> &lambda, const Size &size)
>  {
> -       return M[i][0] * lambda[i - X] + M[i][1] * lambda[i + 1] +
> -              M[i][2] * lambda[i + X] + M[i][3] * lambda[i - 1];
> +       return M[i][0] * lambda[i - size.width] + M[i][1] * lambda[i + 1] +
> +              M[i][2] * lambda[i + size.width] + M[i][3] * lambda[i - 1];
>  }
> -static double computeLambdaTop(int i, double const M[XY][4],
> -                              double lambda[XY])
> +static double computeLambdaTop(int i, const std::vector<std::array<double, 4>> &M,
> +                              std::vector<double> &lambda, const Size &size)
>  {
> -       return M[i][0] * lambda[i - X] + M[i][1] * lambda[i + 1] +
> +       return M[i][0] * lambda[i - size.width] + M[i][1] * lambda[i + 1] +
>                M[i][3] * lambda[i - 1];
>  }
> -static double computeLambdaTopEnd(int i, double const M[XY][4],
> -                                 double lambda[XY])
> +static double computeLambdaTopEnd(int i, const std::vector<std::array<double, 4>> &M,
> +                                 std::vector<double> &lambda, const Size &size)
>  {
> -       return M[i][0] * lambda[i - X] + M[i][3] * lambda[i - 1];
> +       return M[i][0] * lambda[i - size.width] + M[i][3] * lambda[i - 1];
>  }
>  
>  /* Gauss-Seidel iteration with over-relaxation. */
> -static double gaussSeidel2Sor(double const M[XY][4], double omega,
> -                             double lambda[XY], double lambdaBound)
> +static double gaussSeidel2Sor(const std::vector<std::array<double, 4>> &M, double omega,
> +                             std::vector<double> &lambda, double lambdaBound,
> +                             const Size &size)
>  {
> +       int XY = size.width * size.height;
> +       int X = size.width;
>         const double min = 1 - lambdaBound, max = 1 + lambdaBound;
> -       double oldLambda[XY];
> +       std::vector<double> oldLambda = lambda;
>         int i;
> -       for (i = 0; i < XY; i++)
> -               oldLambda[i] = lambda[i];
> -       lambda[0] = computeLambdaBottomStart(0, M, lambda);
> +       lambda[0] = computeLambdaBottomStart(0, M, lambda, size);
>         lambda[0] = std::clamp(lambda[0], min, max);
>         for (i = 1; i < X; i++) {
> -               lambda[i] = computeLambdaBottom(i, M, lambda);
> +               lambda[i] = computeLambdaBottom(i, M, lambda, size);
>                 lambda[i] = std::clamp(lambda[i], min, max);
>         }
>         for (; i < XY - X; i++) {
> -               lambda[i] = computeLambdaInterior(i, M, lambda);
> +               lambda[i] = computeLambdaInterior(i, M, lambda, size);
>                 lambda[i] = std::clamp(lambda[i], min, max);
>         }
>         for (; i < XY - 1; i++) {
> -               lambda[i] = computeLambdaTop(i, M, lambda);
> +               lambda[i] = computeLambdaTop(i, M, lambda, size);
>                 lambda[i] = std::clamp(lambda[i], min, max);
>         }
> -       lambda[i] = computeLambdaTopEnd(i, M, lambda);
> +       lambda[i] = computeLambdaTopEnd(i, M, lambda, size);
>         lambda[i] = std::clamp(lambda[i], min, max);
>         /*
>          * Also solve the system from bottom to top, to help spread the updates
>          * better.
>          */
> -       lambda[i] = computeLambdaTopEnd(i, M, lambda);
> +       lambda[i] = computeLambdaTopEnd(i, M, lambda, size);
>         lambda[i] = std::clamp(lambda[i], min, max);
>         for (i = XY - 2; i >= XY - X; i--) {
> -               lambda[i] = computeLambdaTop(i, M, lambda);
> +               lambda[i] = computeLambdaTop(i, M, lambda, size);
>                 lambda[i] = std::clamp(lambda[i], min, max);
>         }
>         for (; i >= X; i--) {
> -               lambda[i] = computeLambdaInterior(i, M, lambda);
> +               lambda[i] = computeLambdaInterior(i, M, lambda, size);
>                 lambda[i] = std::clamp(lambda[i], min, max);
>         }
>         for (; i >= 1; i--) {
> -               lambda[i] = computeLambdaBottom(i, M, lambda);
> +               lambda[i] = computeLambdaBottom(i, M, lambda, size);
>                 lambda[i] = std::clamp(lambda[i], min, max);
>         }
> -       lambda[0] = computeLambdaBottomStart(0, M, lambda);
> +       lambda[0] = computeLambdaBottomStart(0, M, lambda, size);
>         lambda[0] = std::clamp(lambda[0], min, max);
>         double maxDiff = 0;
>         for (i = 0; i < XY; i++) {
> @@ -698,33 +738,33 @@ static double gaussSeidel2Sor(double const M[XY][4], double omega,
>  }
>  
>  /* Normalise the values so that the smallest value is 1. */
> -static void normalise(double *ptr, size_t n)
> +static void normalise(std::vector<double> &results)
>  {
> -       double minval = ptr[0];
> -       for (size_t i = 1; i < n; i++)
> -               minval = std::min(minval, ptr[i]);
> -       for (size_t i = 0; i < n; i++)
> -               ptr[i] /= minval;
> +       double minval = *std::min_element(results.begin(), results.end());
> +       std::for_each(results.begin(), results.end(),
> +                     [minval](double val) { return val / minval; });
>  }
>  
>  /* Rescale the values so that the average value is 1. */
> -static void reaverage(Span<double> data)
> +static void reaverage(std::vector<double> &data)
>  {
>         double sum = std::accumulate(data.begin(), data.end(), 0.0);
>         double ratio = 1 / (sum / data.size());
> -       for (double &d : data)
> -               d *= ratio;
> +       std::for_each(data.begin(), data.end(),
> +                     [ratio](double val) { return val * ratio; });
>  }
>  
> -static void runMatrixIterations(double const C[XY], double lambda[XY],
> -                               double const W[XY][4], double omega,
> -                               int nIter, double threshold, double lambdaBound)
> +static void runMatrixIterations(const std::vector<double> &C,
> +                               std::vector<double> &lambda,
> +                               const std::vector<std::array<double, 4>> &W,
> +                               std::vector<std::array<double, 4>> &M, double omega,
> +                               unsigned int nIter, double threshold, double lambdaBound,
> +                               const Size &size)
>  {
> -       double M[XY][4];
> -       constructM(C, W, M);
> +       constructM(C, W, M, size);
>         double lastMaxDiff = std::numeric_limits<double>::max();
> -       for (int i = 0; i < nIter; i++) {
> -               double maxDiff = fabs(gaussSeidel2Sor(M, omega, lambda, lambdaBound));
> +       for (unsigned int i = 0; i < nIter; i++) {
> +               double maxDiff = fabs(gaussSeidel2Sor(M, omega, lambda, lambdaBound, size));
>                 if (maxDiff < threshold) {
>                         LOG(RPiAlsc, Debug)
>                                 << "Stop after " << i + 1 << " iterations";
> @@ -741,39 +781,44 @@ static void runMatrixIterations(double const C[XY], double lambda[XY],
>                 lastMaxDiff = maxDiff;
>         }
>         /* We're going to normalise the lambdas so the total average is 1. */
> -       reaverage({ lambda, XY });
> +       reaverage(lambda);

Somehow I thought lambda was a reserved keyword, but it doesn't seem to
be.

>  }
>  
> -static void addLuminanceRb(double result[XY], double const lambda[XY],
> -                          double const luminanceLut[XY],
> +static void addLuminanceRb(std::vector<double> &result, const std::vector<double> &lambda,
> +                          const std::vector<double> &luminanceLut,
>                            double luminanceStrength)
>  {
> -       for (int i = 0; i < XY; i++)
> +       for (unsigned int i = 0; i < result.size(); i++)
>                 result[i] = lambda[i] * ((luminanceLut[i] - 1) * luminanceStrength + 1);
>  }
>  
> -static void addLuminanceG(double result[XY], double lambda,
> -                         double const luminanceLut[XY],
> +static void addLuminanceG(std::vector<double> &result, double lambda,
> +                         const std::vector<double> &luminanceLut,
>                           double luminanceStrength)
>  {
> -       for (int i = 0; i < XY; i++)
> +       for (unsigned int i = 0; i < result.size(); i++)
>                 result[i] = lambda * ((luminanceLut[i] - 1) * luminanceStrength + 1);
>  }
>  
> -void addLuminanceToTables(double results[3][Y][X], double const lambdaR[XY],
> -                         double lambdaG, double const lambdaB[XY],
> -                         double const luminanceLut[XY],
> +void addLuminanceToTables(std::array<std::vector<double>, 3> &results,
> +                         const std::vector<double> &lambdaR,
> +                         double lambdaG, const std::vector<double> &lambdaB,
> +                         const std::vector<double> &luminanceLut,
>                           double luminanceStrength)
>  {
> -       addLuminanceRb((double *)results[0], lambdaR, luminanceLut, luminanceStrength);
> -       addLuminanceG((double *)results[1], lambdaG, luminanceLut, luminanceStrength);
> -       addLuminanceRb((double *)results[2], lambdaB, luminanceLut, luminanceStrength);
> -       normalise((double *)results, 3 * XY);
> +       addLuminanceRb(results[0], lambdaR, luminanceLut, luminanceStrength);
> +       addLuminanceG(results[1], lambdaG, luminanceLut, luminanceStrength);
> +       addLuminanceRb(results[2], lambdaB, luminanceLut, luminanceStrength);
> +       for (auto &r : results)
> +               normalise(r);
>  }
>  
>  void Alsc::doAlsc()
>  {
> -       double cr[XY], cb[XY], wr[XY][4], wb[XY][4], calTableR[XY], calTableB[XY], calTableTmp[XY];
> +       std::vector<double> &cr = tmpC_[0], &cb = tmpC_[1], &calTableR = tmpC_[2],
> +                           &calTableB = tmpC_[3], &calTableTmp = tmpC_[4];
> +       std::vector<std::array<double, 4>> &wr = tmpM_[0], &wb = tmpM_[1], &M = tmpM_[2];
> +
>         /*
>          * Calculate our R/B ("Cr"/"Cb") colour statistics, and assess which are
>          * usable.
> @@ -784,9 +829,9 @@ void Alsc::doAlsc()
>          * case the camera mode is not full-frame.
>          */
>         getCalTable(ct_, config_.calibrationsCr, calTableTmp);
> -       resampleCalTable(calTableTmp, cameraMode_, calTableR);
> +       resampleCalTable(calTableTmp, cameraMode_, config_.tableSize, calTableR);
>         getCalTable(ct_, config_.calibrationsCb, calTableTmp);
> -       resampleCalTable(calTableTmp, cameraMode_, calTableB);
> +       resampleCalTable(calTableTmp, cameraMode_, config_.tableSize, calTableB);
>         /*
>          * You could print out the cal tables for this image here, if you're
>          * tuning the algorithm...
> @@ -796,13 +841,13 @@ void Alsc::doAlsc()
>         applyCalTable(calTableR, cr);
>         applyCalTable(calTableB, cb);
>         /* Compute weights between zones. */
> -       computeW(cr, config_.sigmaCr, wr);
> -       computeW(cb, config_.sigmaCb, wb);
> +       computeW(cr, config_.sigmaCr, wr, config_.tableSize);
> +       computeW(cb, config_.sigmaCb, wb, config_.tableSize);
>         /* Run Gauss-Seidel iterations over the resulting matrix, for R and B. */
> -       runMatrixIterations(cr, lambdaR_, wr, config_.omega, config_.nIter,
> -                           config_.threshold, config_.lambdaBound);
> -       runMatrixIterations(cb, lambdaB_, wb, config_.omega, config_.nIter,
> -                           config_.threshold, config_.lambdaBound);
> +       runMatrixIterations(cr, lambdaR_, wr, M, config_.omega, config_.nIter,
> +                           config_.threshold, config_.lambdaBound, config_.tableSize);
> +       runMatrixIterations(cb, lambdaB_, wb, M, config_.omega, config_.nIter,
> +                           config_.threshold, config_.lambdaBound, config_.tableSize);
>         /*
>          * Fold the calibrated gains into our final lambda values. (Note that on
>          * the next run, we re-start with the lambda values that don't have the
> diff --git a/src/ipa/raspberrypi/controller/rpi/alsc.h b/src/ipa/raspberrypi/controller/rpi/alsc.h
> index 9167c9ffa2e3..85e998db40e9 100644
> --- a/src/ipa/raspberrypi/controller/rpi/alsc.h
> +++ b/src/ipa/raspberrypi/controller/rpi/alsc.h
> @@ -6,9 +6,13 @@
>   */
>  #pragma once
>  
> +#include <array>
>  #include <mutex>
>  #include <condition_variable>
>  #include <thread>
> +#include <vector>
> +
> +#include <libcamera/geometry.h>
>  
>  #include "../algorithm.h"
>  #include "../alsc_status.h"
> @@ -20,7 +24,7 @@ namespace RPiController {
>  
>  struct AlscCalibration {
>         double ct;
> -       double table[AlscCellsX * AlscCellsY];
> +       std::vector<double> table;
>  };
>  
>  struct AlscConfig {
> @@ -36,13 +40,14 @@ struct AlscConfig {
>         uint16_t minG;
>         double omega;
>         uint32_t nIter;
> -       double luminanceLut[AlscCellsX * AlscCellsY];
> +       std::vector<double> luminanceLut;
>         double luminanceStrength;
>         std::vector<AlscCalibration> calibrationsCr;
>         std::vector<AlscCalibration> calibrationsCb;
>         double defaultCt; /* colour temperature if no metadata found */
>         double threshold; /* iteration termination threshold */
>         double lambdaBound; /* upper/lower bound for lambda from a value of 1 */
> +       libcamera::Size tableSize;
>  };
>  
>  class Alsc : public Algorithm
> @@ -62,7 +67,7 @@ private:
>         AlscConfig config_;
>         bool firstTime_;
>         CameraMode cameraMode_;
> -       double luminanceTable_[AlscCellsX * AlscCellsY];
> +       std::vector<double> luminanceTable_;
>         std::thread asyncThread_;
>         void asyncFunc(); /* asynchronous thread function */
>         std::mutex mutex_;
> @@ -88,8 +93,8 @@ private:
>         int frameCount_;
>         /* counts up to startupFrames for Process function */
>         int frameCount2_;
> -       double syncResults_[3][AlscCellsY][AlscCellsX];
> -       double prevSyncResults_[3][AlscCellsY][AlscCellsX];
> +       std::array<std::vector<double>, 3> syncResults_;
> +       std::array<std::vector<double>, 3> prevSyncResults_;
>         void waitForAysncThread();
>         /*
>          * The following are for the asynchronous thread to use, though the main
> @@ -100,12 +105,16 @@ private:
>         void fetchAsyncResults();
>         double ct_;
>         RgbyRegions statistics_;
> -       double asyncResults_[3][AlscCellsY][AlscCellsX];
> -       double asyncLambdaR_[AlscCellsX * AlscCellsY];
> -       double asyncLambdaB_[AlscCellsX * AlscCellsY];
> +       std::array<std::vector<double>, 3> asyncResults_;
> +       std::vector<double> asyncLambdaR_;
> +       std::vector<double> asyncLambdaB_;
>         void doAlsc();
> -       double lambdaR_[AlscCellsX * AlscCellsY];
> -       double lambdaB_[AlscCellsX * AlscCellsY];
> +       std::vector<double> lambdaR_;
> +       std::vector<double> lambdaB_;
> +
> +       /* Temporaries for the computations */
> +       std::array<std::vector<double>, 5> tmpC_;
> +       std::array<std::vector<std::array<double, 4>>, 3> tmpM_;
>  };
>  
>  } /* namespace RPiController */
> diff --git a/src/ipa/raspberrypi/raspberrypi.cpp b/src/ipa/raspberrypi/raspberrypi.cpp
> index b64cb96e2dde..257d862e07b6 100644
> --- a/src/ipa/raspberrypi/raspberrypi.cpp
> +++ b/src/ipa/raspberrypi/raspberrypi.cpp
> @@ -14,6 +14,7 @@
>  #include <stdint.h>
>  #include <string.h>
>  #include <sys/mman.h>
> +#include <vector>
>  
>  #include <linux/bcm2835-isp.h>
>  
> @@ -174,7 +175,7 @@ private:
>         void applyDPC(const struct DpcStatus *dpcStatus, ControlList &ctrls);
>         void applyLS(const struct AlscStatus *lsStatus, ControlList &ctrls);
>         void applyAF(const struct AfStatus *afStatus, ControlList &lensCtrls);
> -       void resampleTable(uint16_t dest[], double const src[12][16], int destW, int destH);
> +       void resampleTable(uint16_t dest[], const std::vector<double> &src, int destW, int destH);
>  
>         std::map<unsigned int, MappedFrameBuffer> buffers_;
>  
> @@ -1768,7 +1769,7 @@ void IPARPi::applyAF(const struct AfStatus *afStatus, ControlList &lensCtrls)
>   * Resamples a 16x12 table with central sampling to destW x destH with corner
>   * sampling.
>   */
> -void IPARPi::resampleTable(uint16_t dest[], double const src[12][16],
> +void IPARPi::resampleTable(uint16_t dest[], const std::vector<double> &src,
>                            int destW, int destH)
>  {
>         /*
> @@ -1793,8 +1794,8 @@ void IPARPi::resampleTable(uint16_t dest[], double const src[12][16],
>                 double yf = y - yLo;
>                 int yHi = yLo < 11 ? yLo + 1 : 11;
>                 yLo = yLo > 0 ? yLo : 0;
> -               double const *rowAbove = src[yLo];
> -               double const *rowBelow = src[yHi];
> +               double const *rowAbove = src.data() + yLo * 16;
> +               double const *rowBelow = src.data() + yHi * 16;

Is this 'stride' always going to be 16?

>                 for (int i = 0; i < destW; i++) {
>                         double above = rowAbove[xLo[i]] * (1 - xf[i]) + rowAbove[xHi[i]] * xf[i];
>                         double below = rowBelow[xLo[i]] * (1 - xf[i]) + rowBelow[xHi[i]] * xf[i];
> -- 
> 2.34.1
>