[PATCH v2] libcamera: utils: Add a helper to convert floating-point to fixed-point

Stefan Klug stefan.klug at ideasonboard.com
Thu Apr 25 11:09:10 CEST 2024


On Wed, Apr 24, 2024 at 08:17:30PM +0200, Stefan Klug wrote:
> Hi Paul,
> 
> thanks for the patch.
> 
> On Wed, Apr 24, 2024 at 04:44:09PM +0900, Paul Elder wrote:
> > Add helper functions for converting between floating point and fixed
> > point numbers. Also add tests for them.
> > 
> > Signed-off-by: Paul Elder <paul.elder at ideasonboard.com>
> > ---
> > Changes in v2:
> > - added the reverse conversion function (fixed -> floating)
> > - added tests
> > - make the conversion code cleaner
> > ---
> >  include/libcamera/base/utils.h | 37 +++++++++++++++++++++++++
> >  src/libcamera/base/utils.cpp   | 20 ++++++++++++++
> >  test/utils.cpp                 | 49 ++++++++++++++++++++++++++++++++++
> >  3 files changed, 106 insertions(+)
> > 
> > diff --git a/include/libcamera/base/utils.h b/include/libcamera/base/utils.h
> > index 37d9af60..da0767e3 100644
> > --- a/include/libcamera/base/utils.h
> > +++ b/include/libcamera/base/utils.h
> > @@ -9,6 +9,7 @@
> >  
> >  #include <algorithm>
> >  #include <chrono>
> > +#include <cmath>
> >  #include <iterator>
> >  #include <memory>
> >  #include <ostream>
> > @@ -369,6 +370,42 @@ decltype(auto) abs_diff(const T &a, const T &b)
> >  
> >  double strtod(const char *__restrict nptr, char **__restrict endptr);
> >  
> > +#ifndef __DOXYGEN__
> > +template<unsigned int I, unsigned int F, typename R, typename T,
> > +	 std::enable_if_t<std::is_integral_v<R> &&
> > +			  std::is_floating_point_v<T>> * = nullptr>
> > +#else
> > +template<unsigned int I, unsigned int F, typename R, typename T>
> > +#endif
> > +constexpr R floatingToFixedPoint(T number)
> > +{
> > +	static_assert(I + F <= sizeof(R) * 8);
> > +
> > +	R maskI = (1 << I) - 1;
> > +	R whole = (static_cast<R>(number) & maskI) << F;
> > +
> > +	R maskF = (1 << F) - 1;
> > +	R frac = static_cast<R>(std::round(number * (1 << F))) & maskF;
> > +
> > +	return whole | frac;
> > +}
> > +
> > +#ifndef __DOXYGEN__
> > +template<unsigned int I, unsigned int F, typename R, typename T,
> > +	 std::enable_if_t<std::is_floating_point_v<R> &&
> > +			  std::is_integral_v<T>> * = nullptr>
> > +#else
> > +template<unsigned int I, unsigned int F, typename R, typename T>
> > +#endif
> > +constexpr R fixedToFloatingPoint(T number)
> > +{
> > +	static_assert(I + F <= sizeof(T) * 8);
> > +
> > +	int coeff = number >> (I + F - 1) ? -1 : 1;
> > +
> > +	return coeff * static_cast<R>(number) / static_cast<R>(1 << F);
> > +}
> > +
> >  } /* namespace utils */
> >  
> >  #ifndef __DOXYGEN__
> > diff --git a/src/libcamera/base/utils.cpp b/src/libcamera/base/utils.cpp
> > index 3b73b442..ba36026d 100644
> > --- a/src/libcamera/base/utils.cpp
> > +++ b/src/libcamera/base/utils.cpp
> > @@ -521,6 +521,26 @@ double strtod(const char *__restrict nptr, char **__restrict endptr)
> >  #endif
> >  }
> >  
> > +/**
> > + * \fn R floatingToFixedPoint(T number)
> > + * \brief Convert a floating point number to a fixed-point representation
> > + * \tparam I Bit width of the integer part of the fixed-point
> > + * \tparam F Bit width of the fractional part of the fixed-point
> > + * \tparam R Return type of the fixed-point representation
> > + * \tparam T Input type of the floating point representation
> > + * \return The converted value
> > + */
> > +
> > +/**
> > + * \fn R fixedToFloatingPoint(T number)
> > + * \brief Convert a fixed-point number to a floating point representation
> > + * \tparam I Bit width of the integer part of the fixed-point
> > + * \tparam F Bit width of the fractional part of the fixed-point
> > + * \tparam R Return type of the floating point representation
> > + * \tparam T Input type of the fixed-point representation
> > + * \return The converted value
> > + */
> > +
> >  } /* namespace utils */
> >  
> >  #ifndef __DOXYGEN__
> > diff --git a/test/utils.cpp b/test/utils.cpp
> > index fc56e14e..f1805207 100644
> > --- a/test/utils.cpp
> > +++ b/test/utils.cpp
> > @@ -170,6 +170,51 @@ protected:
> >  		return TestPass;
> >  	}
> >  
> > +	template<unsigned int intPrec, unsigned fracPrec, typename T>
> > +	int testSingleFixedPoint(double input, T expected)
> > +	{
> > +		T ret = utils::floatingToFixedPoint<intPrec, fracPrec, T>(input);
> > +		if (ret != expected) {
> > +			cerr << "Expected " << input << " to convert to "
> > +			     << expected << ", got " << ret << std::endl;
> > +			return TestFail;
> > +		}
> > +
> > +		/*
> > +		 * The precision check is fairly arbitrary but is based on what
> > +		 * the rkisp1 is capable of in the crosstalk module.
> > +		 */
> > +		double f = utils::fixedToFloatingPoint<intPrec, fracPrec, double>(ret);
> > +		if (std::abs(f - input) > 0.001) {
> > +			cerr << "Reverse conversion expected " << ret
> > +			     << " to convert to " << input
> > +			     << ", got " << f << std::endl;
> > +			return TestFail;
> > +		}
> > +
> > +		return TestPass;
> > +	}
> > +
> > +	int testFixedPoint()
> > +	{
> > +		/* These are the only cases that we know for certain */
> > +		std::map<double, uint16_t> testCases = {
> > +			{ 7.992, 0x3FF },
> > +			{ -8, 0x400 },
> > +			{ 0, 0 },
> > +		};
> 
> Great to have a testcase :-) Unfortunately I still believe there is
> something not exactly right here. As the implementation differed from
> what I had in mind regarding fixed points I played a bit with it...  I
> guess these testvalues where taken from the rkisp header. They all work
> well with this implementation, but they miss the interesting point of
> negative values (-0.4, -1.4) where two's complement gets interesting. In
> that case the current implementation fails in the conversion back to
> float. I'm not sure if vsi implemented a special fixed point format but
> I actually don't expect that.
> 
> I did a testprogram that shows the differences:
> 
> ---------- main.cpp
> #include <stdio.h>
> #include <algorithm>
> #include <chrono>
> #include <cmath>
> #include <iterator>
> #include <memory>
> #include <ostream>
> #include <iostream>
> #include <type_traits>
> #include <bitset>
> 
> template<unsigned int I, unsigned int F, typename R, typename T,
> 	 std::enable_if_t<std::is_integral_v<R> &&
> 			  std::is_floating_point_v<T>> * = nullptr>
> constexpr R floatingToFixedPoint(T number)
> {
> 	static_assert(I + F <= sizeof(R) * 8);
> 
> 	R maskI = (1 << I) - 1;
> 	R whole = (static_cast<R>(number) & maskI) << F;
> 
> 	R maskF = (1 << F) - 1;
> 	R frac = static_cast<R>(std::round(number * (1 << F))) & maskF;
> 
> 	return whole | frac;
> }
> 
> template<unsigned int I, unsigned int F, typename R, typename T,
> 	 std::enable_if_t<std::is_integral_v<R> &&
> 			  std::is_floating_point_v<T>> * = nullptr>
> constexpr R floatingToFixedPoint2(T number)
> {
> 	static_assert(I + F <= sizeof(R) * 8);
> 
> 	R mask = (1 << (F + I)) - 1;
> 	R frac = static_cast<R>(std::round(number * (1 << (F)))) & mask;
> 
> 	return frac;
> }
> 
> 
> template<unsigned int I, unsigned int F, typename R, typename T,
> 	 std::enable_if_t<std::is_floating_point_v<R> &&
> 			  std::is_integral_v<T>> * = nullptr>
> constexpr R fixedToFloatingPoint(T number)
> {
> 	static_assert(I + F <= sizeof(T) * 8);
> 
> 	int coeff = number >> (I + F - 1) ? -1 : 1;
> 
> 	return coeff * static_cast<R>(number) / static_cast<R>(1 << F);
> }
> 
> 
> template<unsigned int I, unsigned int F, typename R, typename T,
> 	 std::enable_if_t<std::is_floating_point_v<R> &&
> 			  std::is_integral_v<T>> * = nullptr>
> constexpr R fixedToFloatingPoint2(T number)
> {
>     static_assert(sizeof(int) >= sizeof(T));
> 	static_assert(I + F <= sizeof(T) * 8);
> 
>         /* create a number with all upper bits set including the sign bit
>          * of the fixed point number
>          */	
> 	int upper_ones = -1 << (I + F -1);
> 	if(number & upper_ones) {
> 	    return static_cast<R>( upper_ones | number ) / static_cast<R>(1 << F);
> 	} else
> 	    return static_cast<R>( number) / static_cast<R>(1 << F);
> }

Actually this fails if number contains a one in the superfluous bits but
is actually positive. I played a bit more (maybe we should have just
copied an existing implementation :-) ):

template<unsigned int I, unsigned int F, typename R, typename T,
	 std::enable_if_t<std::is_floating_point_v<R> &&
			  std::is_integral_v<T>> * = nullptr>
constexpr R fixedToFloatingPoint2(T number)
{
    static_assert(sizeof(int) >= sizeof(T));
	static_assert(I + F <= sizeof(T) * 8);
	
	/* recreate the upper bits in case of a negative number by shifting the sign 
	bit from the fixed point to the first bit of the unsigned and then right shifting 
	by the same amount which keeps the sign bit in place. 
	This can be optimized by the compiler quite well */
	int remaining_bits = sizeof(int)*8 - (I+F);
	int t = static_cast<int>(static_cast<unsigned>(number) << remaining_bits) >> remaining_bits;
	return static_cast<R>(t) / static_cast<R>(1 << F);
}


The question if this is actally the fixed format used by the rkisp
still needs to be proven though.

> 
> void doTest(double x) {
>     uint16_t f = floatingToFixedPoint<4,7,uint16_t>(x);
>     uint16_t f2 = floatingToFixedPoint2<4,7,uint16_t>(x);
>     double b = fixedToFloatingPoint<4,7,double>(f);
>     double b2 = fixedToFloatingPoint2<4,7,double>(f2);
>     std::cout << std::hex << "===== test " << x << std::endl;
>     std::cout << "v1: " << f << "(" << std::bitset<16>(f) << ") reverse: " << b << std::endl;
>     std::cout << "v2: " << f2 << "(" << std::bitset<16>(f2) << ") reverse: " << b2 << std::endl;
> }
> 
> int main()
> {
>     doTest(-8.0);
>     doTest(7.992);
>     doTest(1.0);
>     doTest(0.4);
>     doTest(1.4);
>     doTest(-0.4);
>     doTest(-1.4);
> } 
> 
> --- end of main.cpp
> 
> 
> I couldn't come up with a simpler solution to handle the upper ones in
> the fixed2FloatingPoint conversion. If we would have 16 or 32 bit wide
> fixed points that wouldn't be an issue. Maybe someone else has an idea
> here.
> 
> Best regards,
> Stefan
> 
> > +
> > +		int ret;
> > +		for (const auto &testCase : testCases) {
> > +			ret = testSingleFixedPoint<4, 7, uint16_t>(testCase.first,
> > +								   testCase.second);
> > +			if (ret != TestPass)
> > +				return ret;
> > +		}
> > +
> > +		return TestPass;
> > +	}
> > +
> >  	int run()
> >  	{
> >  		/* utils::hex() test. */
> > @@ -290,6 +335,10 @@ protected:
> >  		if (testDuration() != TestPass)
> >  			return TestFail;
> >  
> > +		/* fixed point conversion test */
> > +		if (testFixedPoint() != TestPass)
> > +			return TestFail;
> > +
> >  		return TestPass;
> >  	}
> >  };
> > -- 
> > 2.39.2
> > 


More information about the libcamera-devel mailing list