[PATCH v2] libcamera: utils: Add a helper to convert floating-point to fixed-point

Thu Apr 25 17:38:43 CEST 2024

Hi Paul,

what else could go wrong...

On Thu, Apr 25, 2024 at 11:09:10AM +0200, Stefan Klug wrote:
> On Wed, Apr 24, 2024 at 08:17:30PM +0200, Stefan Klug wrote:
> > Hi Paul,
> > 
> > thanks for the patch.
> > 
> > On Wed, Apr 24, 2024 at 04:44:09PM +0900, Paul Elder wrote:
> > > Add helper functions for converting between floating point and fixed
> > > point numbers. Also add tests for them.
> > > 
> > > Signed-off-by: Paul Elder <paul.elder at ideasonboard.com>
> > > ---
> > > Changes in v2:
> > > - added the reverse conversion function (fixed -> floating)
> > > - added tests
> > > - make the conversion code cleaner
> > > ---
> > >  include/libcamera/base/utils.h | 37 +++++++++++++++++++++++++
> > >  src/libcamera/base/utils.cpp   | 20 ++++++++++++++
> > >  test/utils.cpp                 | 49 ++++++++++++++++++++++++++++++++++
> > >  3 files changed, 106 insertions(+)
> > > 
> > > diff --git a/include/libcamera/base/utils.h b/include/libcamera/base/utils.h
> > > index 37d9af60..da0767e3 100644
> > > --- a/include/libcamera/base/utils.h
> > > +++ b/include/libcamera/base/utils.h
> > > @@ -9,6 +9,7 @@
> > >  
> > >  #include <algorithm>
> > >  #include <chrono>
> > > +#include <cmath>
> > >  #include <iterator>
> > >  #include <memory>
> > >  #include <ostream>
> > > @@ -369,6 +370,42 @@ decltype(auto) abs_diff(const T &a, const T &b)
> > >  
> > >  double strtod(const char *__restrict nptr, char **__restrict endptr);
> > >  
> > > +#ifndef __DOXYGEN__
> > > +template<unsigned int I, unsigned int F, typename R, typename T,
> > > +	 std::enable_if_t<std::is_integral_v<R> &&
> > > +			  std::is_floating_point_v<T>> * = nullptr>
> > > +#else
> > > +template<unsigned int I, unsigned int F, typename R, typename T>
> > > +#endif
> > > +constexpr R floatingToFixedPoint(T number)
> > > +{
> > > +	static_assert(I + F <= sizeof(R) * 8);
> > > +
> > > +	R maskI = (1 << I) - 1;
> > > +	R whole = (static_cast<R>(number) & maskI) << F;
> > > +
> > > +	R maskF = (1 << F) - 1;
> > > +	R frac = static_cast<R>(std::round(number * (1 << F))) & maskF;
> > > +
> > > +	return whole | frac;
> > > +}
> > > +
> > > +#ifndef __DOXYGEN__
> > > +template<unsigned int I, unsigned int F, typename R, typename T,
> > > +	 std::enable_if_t<std::is_floating_point_v<R> &&
> > > +			  std::is_integral_v<T>> * = nullptr>
> > > +#else
> > > +template<unsigned int I, unsigned int F, typename R, typename T>
> > > +#endif
> > > +constexpr R fixedToFloatingPoint(T number)
> > > +{
> > > +	static_assert(I + F <= sizeof(T) * 8);
> > > +
> > > +	int coeff = number >> (I + F - 1) ? -1 : 1;
> > > +
> > > +	return coeff * static_cast<R>(number) / static_cast<R>(1 << F);
> > > +}
> > > +
> > >  } /* namespace utils */
> > >  
> > >  #ifndef __DOXYGEN__
> > > diff --git a/src/libcamera/base/utils.cpp b/src/libcamera/base/utils.cpp
> > > index 3b73b442..ba36026d 100644
> > > --- a/src/libcamera/base/utils.cpp
> > > +++ b/src/libcamera/base/utils.cpp
> > > @@ -521,6 +521,26 @@ double strtod(const char *__restrict nptr, char **__restrict endptr)
> > >  #endif
> > >  }
> > >  
> > > +/**
> > > + * \fn R floatingToFixedPoint(T number)
> > > + * \brief Convert a floating point number to a fixed-point representation
> > > + * \tparam I Bit width of the integer part of the fixed-point
> > > + * \tparam F Bit width of the fractional part of the fixed-point
> > > + * \tparam R Return type of the fixed-point representation
> > > + * \tparam T Input type of the floating point representation
> > > + * \return The converted value
> > > + */
> > > +
> > > +/**
> > > + * \fn R fixedToFloatingPoint(T number)
> > > + * \brief Convert a fixed-point number to a floating point representation
> > > + * \tparam I Bit width of the integer part of the fixed-point
> > > + * \tparam F Bit width of the fractional part of the fixed-point
> > > + * \tparam R Return type of the floating point representation
> > > + * \tparam T Input type of the fixed-point representation
> > > + * \return The converted value
> > > + */
> > > +
> > >  } /* namespace utils */
> > >  
> > >  #ifndef __DOXYGEN__
> > > diff --git a/test/utils.cpp b/test/utils.cpp
> > > index fc56e14e..f1805207 100644
> > > --- a/test/utils.cpp
> > > +++ b/test/utils.cpp
> > > @@ -170,6 +170,51 @@ protected:
> > >  		return TestPass;
> > >  	}
> > >  
> > > +	template<unsigned int intPrec, unsigned fracPrec, typename T>
> > > +	int testSingleFixedPoint(double input, T expected)
> > > +	{
> > > +		T ret = utils::floatingToFixedPoint<intPrec, fracPrec, T>(input);
> > > +		if (ret != expected) {
> > > +			cerr << "Expected " << input << " to convert to "
> > > +			     << expected << ", got " << ret << std::endl;
> > > +			return TestFail;
> > > +		}
> > > +
> > > +		/*
> > > +		 * The precision check is fairly arbitrary but is based on what
> > > +		 * the rkisp1 is capable of in the crosstalk module.
> > > +		 */
> > > +		double f = utils::fixedToFloatingPoint<intPrec, fracPrec, double>(ret);
> > > +		if (std::abs(f - input) > 0.001) {
> > > +			cerr << "Reverse conversion expected " << ret
> > > +			     << " to convert to " << input
> > > +			     << ", got " << f << std::endl;
> > > +			return TestFail;
> > > +		}
> > > +
> > > +		return TestPass;
> > > +	}
> > > +
> > > +	int testFixedPoint()
> > > +	{
> > > +		/* These are the only cases that we know for certain */
> > > +		std::map<double, uint16_t> testCases = {
> > > +			{ 7.992, 0x3FF },
> > > +			{ -8, 0x400 },
> > > +			{ 0, 0 },
> > > +		};
> > 
> > Great to have a testcase :-) Unfortunately I still believe there is
> > something not exactly right here. As the implementation differed from
> > what I had in mind regarding fixed points I played a bit with it...  I
> > guess these testvalues where taken from the rkisp header. They all work
> > well with this implementation, but they miss the interesting point of
> > negative values (-0.4, -1.4) where two's complement gets interesting. In
> > that case the current implementation fails in the conversion back to
> > float. I'm not sure if vsi implemented a special fixed point format but
> > I actually don't expect that.
> > 
> > I did a testprogram that shows the differences:
> > 
> > ---------- main.cpp
> > #include <stdio.h>
> > #include <algorithm>
> > #include <chrono>
> > #include <cmath>
> > #include <iterator>
> > #include <memory>
> > #include <ostream>
> > #include <iostream>
> > #include <type_traits>
> > #include <bitset>
> > 
> > template<unsigned int I, unsigned int F, typename R, typename T,
> > 	 std::enable_if_t<std::is_integral_v<R> &&
> > 			  std::is_floating_point_v<T>> * = nullptr>
> > constexpr R floatingToFixedPoint(T number)
> > {
> > 	static_assert(I + F <= sizeof(R) * 8);
> > 
> > 	R maskI = (1 << I) - 1;
> > 	R whole = (static_cast<R>(number) & maskI) << F;
> > 
> > 	R maskF = (1 << F) - 1;
> > 	R frac = static_cast<R>(std::round(number * (1 << F))) & maskF;
> > 
> > 	return whole | frac;
> > }
> > 
> > template<unsigned int I, unsigned int F, typename R, typename T,
> > 	 std::enable_if_t<std::is_integral_v<R> &&
> > 			  std::is_floating_point_v<T>> * = nullptr>
> > constexpr R floatingToFixedPoint2(T number)
> > {
> > 	static_assert(I + F <= sizeof(R) * 8);
> > 
> > 	R mask = (1 << (F + I)) - 1;
> > 	R frac = static_cast<R>(std::round(number * (1 << (F)))) & mask;

... turns out, this doesn't work on arm ... 
See https://embeddeduse.com/2013/08/25/casting-a-negative-float-to-an-unsigned-int/

This version works for me:

See template<unsigned int I, unsigned int F, typename R, typename T,
	 std::enable_if_t<std::is_integral_v<R> &&
			  std::is_floating_point_v<T>> * = nullptr>
constexpr R floatingToFixedPoint2(T number)
{
	static_assert(sizeof(int) >= sizeof(R));
	static_assert(I + F <= sizeof(R) * 8);

	R mask = (1 << (F + I)) - 1;
	/* 
	* The inbetween cast to int is needed on arm platforms to properly
	* cast negative values. See
	* https://embeddeduse.com/2013/08/25/casting-a-negative-float-to-an-unsigned-int/
	*/
	R frac = static_cast<R>(static_cast<int>(std::round(number * (1 << F)))) & mask;

	return frac;
}

Cheers
Stefan

> > 
> > 	return frac;
> > }
> > 
> > 
> > template<unsigned int I, unsigned int F, typename R, typename T,
> > 	 std::enable_if_t<std::is_floating_point_v<R> &&
> > 			  std::is_integral_v<T>> * = nullptr>
> > constexpr R fixedToFloatingPoint(T number)
> > {
> > 	static_assert(I + F <= sizeof(T) * 8);
> > 
> > 	int coeff = number >> (I + F - 1) ? -1 : 1;
> > 
> > 	return coeff * static_cast<R>(number) / static_cast<R>(1 << F);
> > }
> > 
> > 
> > template<unsigned int I, unsigned int F, typename R, typename T,
> > 	 std::enable_if_t<std::is_floating_point_v<R> &&
> > 			  std::is_integral_v<T>> * = nullptr>
> > constexpr R fixedToFloatingPoint2(T number)
> > {
> >     static_assert(sizeof(int) >= sizeof(T));
> > 	static_assert(I + F <= sizeof(T) * 8);
> > 
> >         /* create a number with all upper bits set including the sign bit
> >          * of the fixed point number
> >          */	
> > 	int upper_ones = -1 << (I + F -1);
> > 	if(number & upper_ones) {
> > 	    return static_cast<R>( upper_ones | number ) / static_cast<R>(1 << F);
> > 	} else
> > 	    return static_cast<R>( number) / static_cast<R>(1 << F);
> > }
> 
> Actually this fails if number contains a one in the superfluous bits but
> is actually positive. I played a bit more (maybe we should have just
> copied an existing implementation :-) ):
> 
> template<unsigned int I, unsigned int F, typename R, typename T,
> 	 std::enable_if_t<std::is_floating_point_v<R> &&
> 			  std::is_integral_v<T>> * = nullptr>
> constexpr R fixedToFloatingPoint2(T number)
> {
>     static_assert(sizeof(int) >= sizeof(T));
> 	static_assert(I + F <= sizeof(T) * 8);
> 	
> 	/* recreate the upper bits in case of a negative number by shifting the sign 
> 	bit from the fixed point to the first bit of the unsigned and then right shifting 
> 	by the same amount which keeps the sign bit in place. 
> 	This can be optimized by the compiler quite well */
> 	int remaining_bits = sizeof(int)*8 - (I+F);
> 	int t = static_cast<int>(static_cast<unsigned>(number) << remaining_bits) >> remaining_bits;
> 	return static_cast<R>(t) / static_cast<R>(1 << F);
> }
> 
> 
> The question if this is actally the fixed format used by the rkisp
> still needs to be proven though.
> 
> > 
> > void doTest(double x) {
> >     uint16_t f = floatingToFixedPoint<4,7,uint16_t>(x);
> >     uint16_t f2 = floatingToFixedPoint2<4,7,uint16_t>(x);
> >     double b = fixedToFloatingPoint<4,7,double>(f);
> >     double b2 = fixedToFloatingPoint2<4,7,double>(f2);
> >     std::cout << std::hex << "===== test " << x << std::endl;
> >     std::cout << "v1: " << f << "(" << std::bitset<16>(f) << ") reverse: " << b << std::endl;
> >     std::cout << "v2: " << f2 << "(" << std::bitset<16>(f2) << ") reverse: " << b2 << std::endl;
> > }
> > 
> > int main()
> > {
> >     doTest(-8.0);
> >     doTest(7.992);
> >     doTest(1.0);
> >     doTest(0.4);
> >     doTest(1.4);
> >     doTest(-0.4);
> >     doTest(-1.4);
> > } 
> > 
> > --- end of main.cpp
> > 
> > 
> > I couldn't come up with a simpler solution to handle the upper ones in
> > the fixed2FloatingPoint conversion. If we would have 16 or 32 bit wide
> > fixed points that wouldn't be an issue. Maybe someone else has an idea
> > here.
> > 
> > Best regards,
> > Stefan
> > 
> > > +
> > > +		int ret;
> > > +		for (const auto &testCase : testCases) {
> > > +			ret = testSingleFixedPoint<4, 7, uint16_t>(testCase.first,
> > > +								   testCase.second);
> > > +			if (ret != TestPass)
> > > +				return ret;
> > > +		}
> > > +
> > > +		return TestPass;
> > > +	}
> > > +
> > >  	int run()
> > >  	{
> > >  		/* utils::hex() test. */
> > > @@ -290,6 +335,10 @@ protected:
> > >  		if (testDuration() != TestPass)
> > >  			return TestFail;
> > >  
> > > +		/* fixed point conversion test */
> > > +		if (testFixedPoint() != TestPass)
> > > +			return TestFail;
> > > +
> > >  		return TestPass;
> > >  	}
> > >  };
> > > -- 
> > > 2.39.2
> > >