[libcamera-devel] [PATCH v2 1/9] android: jpeg: exif: Expand setString to support different encodings

Laurent Pinchart laurent.pinchart at ideasonboard.com
Thu Jan 21 20:35:55 CET 2021


Hi Paul,

Thank you for the patch.

On Thu, Jan 21, 2021 at 07:15:41PM +0900, Paul Elder wrote:
> GPSProcessingMethod and UserComment in EXIF tags can be in UTF-16.
> Expand setString to take an encoding when the field type is undefined.
> Update callers accordingly.
> 
> Signed-off-by: Paul Elder <paul.elder at ideasonboard.com>
> 
> ---
> Changes in v2:
> - moved from utils into exif
> - support no-encoding
> ---
>  src/android/jpeg/exif.cpp | 107 +++++++++++++++++++++++++++++++-------
>  src/android/jpeg/exif.h   |  12 ++++-
>  2 files changed, 99 insertions(+), 20 deletions(-)
> 
> diff --git a/src/android/jpeg/exif.cpp b/src/android/jpeg/exif.cpp
> index 33b3fa7f..cff366a4 100644
> --- a/src/android/jpeg/exif.cpp
> +++ b/src/android/jpeg/exif.cpp
> @@ -7,6 +7,9 @@
>  
>  #include "exif.h"
>  
> +#include <map>
> +#include <uchar.h>
> +
>  #include "libcamera/internal/log.h"
>  #include "libcamera/internal/utils.h"
>  
> @@ -64,7 +67,7 @@ Exif::Exif()
>  	exif_data_set_byte_order(data_, order_);
>  
>  	setString(EXIF_IFD_EXIF, EXIF_TAG_EXIF_VERSION,
> -		  EXIF_FORMAT_UNDEFINED, "0231");
> +		  EXIF_FORMAT_UNDEFINED, NoEncoding, "0231");
>  
>  	/* Create the mandatory EXIF fields with default data. */
>  	exif_data_fix(data_);
> @@ -178,10 +181,18 @@ void Exif::setRational(ExifIfd ifd, ExifTag tag, ExifRational item)
>  	exif_entry_unref(entry);
>  }
>  
> -void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format, const std::string &item)
> +static const std::map<Exif::StringEncoding, std::vector<uint8_t>> StringEncodingCodes = {

static const std::map<Exif::StringEncoding, std::array<uint8_t, 8>> StringEncodingCodes

would be more efficient (if it doesn't cause too many issues in the code
below).

And s/StringEncodingCodes/stringEncodingCodes/

> +	{ Exif::ASCII,     { 0x41, 0x53, 0x43, 0x49, 0x49, 0x00, 0x00, 0x00 } },
> +	{ Exif::JIS,       { 0x4a, 0x49, 0x53, 0x00, 0x00, 0x00, 0x00, 0x00 } },
> +	{ Exif::Unicode,   { 0x55, 0x4e, 0x49, 0x43, 0x4f, 0x44, 0x45, 0x00 } },
> +	{ Exif::Undefined, { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },

Should we drop JIS and Undefined ? It would be different if we were
parsing EXIF data, for as an EXIF generator, I doubt we'll ever generate
a string in any of those encodings.

> +};
> +
> +void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format,
> +		     StringEncoding enc, const std::string &item)

s/enc/encoding/ ?

>  {
>  	std::string ascii;
> -	size_t length;
> +	size_t length = 0;
>  	const char *str;
>  
>  	if (format == EXIF_FORMAT_ASCII) {
> @@ -190,14 +201,44 @@ void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format, const std::str
>  
>  		/* Pad 1 extra byte to null-terminate the ASCII string. */
>  		length = ascii.length() + 1;
> -	} else {
> -		str = item.c_str();
> -
> -		/*
> -		 * Strings stored in different formats (EXIF_FORMAT_UNDEFINED)
> -		 * are not null-terminated.
> -		 */
> -		length = item.length();
> +	} else if (format == EXIF_FORMAT_UNDEFINED) {

Any reason to replace else with else if ? If you keep a plain else, you
won't have to initialize length to 0.

> +		std::vector<uint8_t> buf;
> +		const std::vector<uint8_t> &constbuf = buf;

I don't think you need constbuf.

You can move the encoding lookup out of the switch:

		auto encodingString = stringEncodingCodes.find(enc);
		if (encodingString != stringEncodingCodes.end())
			buf = {
				encodingString->second.begin(),
				encodingString->second.end()
			};

(This handles the conversion from std::array to std::vector)

> +		std::u16string u16str;
> +
> +		switch (enc) {
> +		case Unicode:
> +			buf = StringEncodingCodes.find(enc)->second;
> +			u16str = utf8ToUtf16(item);
> +
> +			/* Little-endian */

Should this depend on the endianness of the EXIF ?

> +			buf.resize(8 + u16str.size() * 2);
> +			for (size_t i = 0; i < u16str.size(); i++) {

			for (char16_t c : u16str) {

> +				buf[8 + 2 * i] = (u16str[i] & 0xff);
> +				buf[8 + 2 * i + 1] = ((u16str[i] >> 8) & 0xff);
> +			}
> +
> +			str = reinterpret_cast<const char *>(constbuf.data());
> +			length = buf.size();
> +
> +			break;
> +		/* \todo Support JIS */
> +		case JIS:
> +		case ASCII:
> +		case Undefined:
> +			buf = StringEncodingCodes.find(enc)->second;
> +			[[fallthrough]];

		case NoEncoding:

and you should be able to drop the default case.

> +		default:
> +			buf.insert(buf.end(), item.begin(), item.end());
> +			str = reinterpret_cast<const char *>(constbuf.data());
> +
> +			/*
> +			 * Strings stored in different formats (EXIF_FORMAT_UNDEFINED)
> +			 * are not null-terminated.
> +			 */
> +			length = buf.size();
> +			break;
> +		}

Once you exit this scope, buf will be destroyed, and str becomes an
invalid pointer. You need to declare buf in the top level scope.

>  	}
>  
>  	ExifEntry *entry = createEntry(ifd, tag, format, length, length);
> @@ -210,12 +251,12 @@ void Exif::setString(ExifIfd ifd, ExifTag tag, ExifFormat format, const std::str
>  
>  void Exif::setMake(const std::string &make)
>  {
> -	setString(EXIF_IFD_0, EXIF_TAG_MAKE, EXIF_FORMAT_ASCII, make);
> +	setString(EXIF_IFD_0, EXIF_TAG_MAKE, EXIF_FORMAT_ASCII, NoEncoding, make);
>  }
>  
>  void Exif::setModel(const std::string &model)
>  {
> -	setString(EXIF_IFD_0, EXIF_TAG_MODEL, EXIF_FORMAT_ASCII, model);
> +	setString(EXIF_IFD_0, EXIF_TAG_MODEL, EXIF_FORMAT_ASCII, NoEncoding, model);
>  }
>  
>  void Exif::setSize(const Size &size)
> @@ -233,9 +274,9 @@ void Exif::setTimestamp(time_t timestamp)
>  	strftime(str, sizeof(str), "%Y:%m:%d %H:%M:%S", &tm);
>  	std::string ts(str);
>  
> -	setString(EXIF_IFD_0, EXIF_TAG_DATE_TIME, EXIF_FORMAT_ASCII, ts);
> -	setString(EXIF_IFD_EXIF, EXIF_TAG_DATE_TIME_ORIGINAL, EXIF_FORMAT_ASCII, ts);
> -	setString(EXIF_IFD_EXIF, EXIF_TAG_DATE_TIME_DIGITIZED, EXIF_FORMAT_ASCII, ts);
> +	setString(EXIF_IFD_0, EXIF_TAG_DATE_TIME, EXIF_FORMAT_ASCII, NoEncoding, ts);
> +	setString(EXIF_IFD_EXIF, EXIF_TAG_DATE_TIME_ORIGINAL, EXIF_FORMAT_ASCII, NoEncoding, ts);
> +	setString(EXIF_IFD_EXIF, EXIF_TAG_DATE_TIME_DIGITIZED, EXIF_FORMAT_ASCII, NoEncoding, ts);
>  
>  	/* Query and set timezone information if available. */
>  	int r = strftime(str, sizeof(str), "%z", &tm);
> @@ -244,13 +285,13 @@ void Exif::setTimestamp(time_t timestamp)
>  		tz.insert(3, 1, ':');
>  		setString(EXIF_IFD_EXIF,
>  			  static_cast<ExifTag>(_ExifTag::OFFSET_TIME),
> -			  EXIF_FORMAT_ASCII, tz);
> +			  EXIF_FORMAT_ASCII, NoEncoding, tz);
>  		setString(EXIF_IFD_EXIF,
>  			  static_cast<ExifTag>(_ExifTag::OFFSET_TIME_ORIGINAL),
> -			  EXIF_FORMAT_ASCII, tz);
> +			  EXIF_FORMAT_ASCII, NoEncoding, tz);
>  		setString(EXIF_IFD_EXIF,
>  			  static_cast<ExifTag>(_ExifTag::OFFSET_TIME_DIGITIZED),
> -			  EXIF_FORMAT_ASCII, tz);
> +			  EXIF_FORMAT_ASCII, NoEncoding, tz);
>  	}
>  }
>  
> @@ -290,6 +331,34 @@ void Exif::setThumbnail(Span<const unsigned char> thumbnail,
>  	setShort(EXIF_IFD_0, EXIF_TAG_COMPRESSION, compression);
>  }
>  
> +/**
> + * \brief Convert UTF-8 string to UTF-16 string
> + * \param[in] str String to convert
> + *
> + * \return \a str in UTF-16
> + */
> +std::u16string Exif::utf8ToUtf16(const std::string &str)
> +{
> +	std::mbstate_t state{};

This should be mbstate_t as you're including uchar.h.

> +	char16_t c16;
> +	const char *ptr = str.data();
> +	const char *end = ptr + str.size();
> +
> +	std::u16string ret;
> +	while (size_t rc = mbrtoc16(&c16, ptr, end - ptr + 1, &state)) {
> +		if (rc == static_cast<size_t>(-2) ||
> +		    rc == static_cast<size_t>(-1))
> +			break;
> +
> +		ret.push_back(c16);
> +
> +		if (rc > 0)
> +			ptr += rc;
> +	}
> +
> +	return ret;
> +}
> +
>  [[nodiscard]] int Exif::generate()
>  {
>  	if (exifData_) {
> diff --git a/src/android/jpeg/exif.h b/src/android/jpeg/exif.h
> index 5cab4559..db98ba63 100644
> --- a/src/android/jpeg/exif.h
> +++ b/src/android/jpeg/exif.h
> @@ -26,6 +26,14 @@ public:
>  		JPEG = 6,
>  	};
>  
> +	enum StringEncoding {
> +		NoEncoding = 0,
> +		ASCII = 1,
> +		JIS = 2,
> +		Unicode = 3,
> +		Undefined = 4,
> +	};
> +
>  	void setMake(const std::string &make);
>  	void setModel(const std::string &model);
>  
> @@ -46,9 +54,11 @@ private:
>  	void setShort(ExifIfd ifd, ExifTag tag, uint16_t item);
>  	void setLong(ExifIfd ifd, ExifTag tag, uint32_t item);
>  	void setString(ExifIfd ifd, ExifTag tag, ExifFormat format,
> -		       const std::string &item);
> +		       StringEncoding enc, const std::string &item);

Would it make sense to move the encoding parameter last, to default it
to NoEncoding ? That way most of the callers (and in particular the ones
that pass EXIF_FORMAT_ASCII for format) wouldn't need to set enc to
NoEncoding explicitly.

>  	void setRational(ExifIfd ifd, ExifTag tag, ExifRational item);
>  
> +	std::u16string utf8ToUtf16(const std::string &str);
> +
>  	bool valid_;
>  
>  	ExifData *data_;

-- 
Regards,

Laurent Pinchart


More information about the libcamera-devel mailing list