[libcamera-devel] [PATCH 2/2] android: CameraDevice: Fix Camera3RequestDescriptor leakage

Laurent Pinchart laurent.pinchart at ideasonboard.com
Thu Mar 25 18:21:27 CET 2021


Hi Hiro,

Thank you for the patch, it's nice to see the HAL being cleaned up :-)

On Thu, Mar 25, 2021 at 05:02:43PM +0100, Jacopo Mondi wrote:
> On Thu, Mar 25, 2021 at 08:13:57PM +0900, Hirokazu Honda wrote:
> > CameraDevice creates Camera3RequestDescriptor in
> > processCaptureRequest() and disallocates in requestComplete().
> > Camera3RequestDescriptor can never be destroyed if
> > requestComplete() is never called. This avoid the memory
> > leakage by storing them in map CamerarRequestDescriptor.

s/CamerarRequestDescriptor/CameraRequestDescriptor/

When does this happen ? You've reported issues with the IPU3 pipeline
handler failing to queue a request due to fact that it has no internal
queue of requests waiting for hardware resources to be available. Is
that what you're addressing here, or is there something else ? The
libcamera core should guarantee that requestComplete() gets called for
all requests successfully queued, so if there are other leakages, I'd
like to investigate them.

> Very nice, I'm not particularly proud of this part of the
> implementation as it is quite hard to follow, this makes it easier
> indeed
> 
> One question below
> 
> >
> > Signed-off-by: Hirokazu Honda <hiroh at chromium.org>
> > ---
> >  src/android/camera_device.cpp | 67 +++++++++++++++++++----------------
> >  src/android/camera_device.h   |  6 +++-
> >  src/android/camera_worker.cpp |  4 +--
> >  src/android/camera_worker.h   |  2 +-
> >  4 files changed, 44 insertions(+), 35 deletions(-)
> >
> > diff --git a/src/android/camera_device.cpp b/src/android/camera_device.cpp
> > index ae693664..50b017a3 100644
> > --- a/src/android/camera_device.cpp
> > +++ b/src/android/camera_device.cpp
> > @@ -287,15 +287,17 @@ CameraDevice::Camera3RequestDescriptor::Camera3RequestDescriptor(
> >
> >  	/*
> >  	 * Create the libcamera::Request unique_ptr<> to tie its lifetime
> > -	 * to the descriptor's one. Set the descriptor's address as the
> > -	 * request's cookie to retrieve it at completion time.
> > +	 * to the descriptor's one.
> >  	 */
> > -	request_ = std::make_unique<CaptureRequest>(camera,
> > -						    reinterpret_cast<uint64_t>(this));
> > +	request_ = std::make_unique<CaptureRequest>(camera);
> >  }
> >
> > +CameraDevice::Camera3RequestDescriptor::Camera3RequestDescriptor() = default;
> >  CameraDevice::Camera3RequestDescriptor::~Camera3RequestDescriptor() = default;
> >
> > +CameraDevice::Camera3RequestDescriptor&
> > +CameraDevice::Camera3RequestDescriptor::operator=(Camera3RequestDescriptor&&) = default;
> > +
> >  /*
> >   * \class CameraDevice
> >   *
> > @@ -1811,8 +1813,8 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> >  	 * The descriptor and the associated memory reserved here are freed
> >  	 * at request complete time.
> >  	 */
> > -	Camera3RequestDescriptor *descriptor =
> > -		new Camera3RequestDescriptor(camera_.get(), camera3Request);
> > +	Camera3RequestDescriptor descriptor(camera_.get(), camera3Request);
> > +
> >  	/*
> >  	 * \todo The Android request model is incremental, settings passed in
> >  	 * previous requests are to be effective until overridden explicitly in
> > @@ -1822,12 +1824,12 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> >  	if (camera3Request->settings)
> >  		lastSettings_ = camera3Request->settings;
> >  	else
> > -		descriptor->settings_ = lastSettings_;
> > +		descriptor.settings_ = lastSettings_;
> >
> > -	LOG(HAL, Debug) << "Queueing request " << descriptor->request_->cookie()
> > -			<< " with " << descriptor->buffers_.size() << " streams";
> > -	for (unsigned int i = 0; i < descriptor->buffers_.size(); ++i) {
> > -		const camera3_stream_buffer_t &camera3Buffer = descriptor->buffers_[i];
> > +	LOG(HAL, Debug) << "Queueing request " << descriptor.request_->cookie()
> > +			<< " with " << descriptor.buffers_.size() << " streams";
> > +	for (unsigned int i = 0; i < descriptor.buffers_.size(); ++i) {
> > +		const camera3_stream_buffer_t &camera3Buffer = descriptor.buffers_[i];
> >  		camera3_stream *camera3Stream = camera3Buffer.stream;
> >  		CameraStream *cameraStream = static_cast<CameraStream *>(camera3Stream->priv);
> >
> > @@ -1860,7 +1862,7 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> >  			 * lifetime management only.
> >  			 */
> >  			buffer = createFrameBuffer(*camera3Buffer.buffer);
> > -			descriptor->frameBuffers_.emplace_back(buffer);
> > +			descriptor.frameBuffers_.emplace_back(buffer);
> >  			LOG(HAL, Debug) << ss.str() << " (direct)";
> >  			break;
> >
> > @@ -1879,11 +1881,10 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> >
> >  		if (!buffer) {
> >  			LOG(HAL, Error) << "Failed to create buffer";
> > -			delete descriptor;
> >  			return -ENOMEM;
> >  		}
> >
> > -		descriptor->request_->addBuffer(cameraStream->stream(), buffer,
> > +		descriptor.request_->addBuffer(cameraStream->stream(), buffer,
> >  						camera3Buffer.acquire_fence);
> >  	}
> >
> > @@ -1891,11 +1892,12 @@ int CameraDevice::processCaptureRequest(camera3_capture_request_t *camera3Reques
> >  	 * Translate controls from Android to libcamera and queue the request
> >  	 * to the CameraWorker thread.
> >  	 */
> > -	int ret = processControls(descriptor);
> > +	int ret = processControls(&descriptor);
> >  	if (ret)
> >  		return ret;
> >
> > -	worker_.queueRequest(descriptor->request_.get());
> > +	worker_.queueRequest(descriptor.request_.get());
> > +	descriptors_[descriptor.request_->cookie()] = std::move(descriptor);
> >
> >  	return 0;
> >  }
> > @@ -1905,8 +1907,13 @@ void CameraDevice::requestComplete(Request *request)
> >  	const Request::BufferMap &buffers = request->buffers();
> >  	camera3_buffer_status status = CAMERA3_BUFFER_STATUS_OK;
> >  	std::unique_ptr<CameraMetadata> resultMetadata;
> > -	Camera3RequestDescriptor *descriptor =
> > -		reinterpret_cast<Camera3RequestDescriptor *>(request->cookie());
> > +	auto node = descriptors_.extract(request->cookie());
> 
> So once node goes out of scope the mapped descriptor gets deleted.
> Very nice.
> 
> However, shouldn't descriptors_ be cleared at stop() time or when a
> new stream configuration is requested ?

I'm wondering if we shouldn't go one step forward. I'll assume here that
this patch is dealing with failures to queue requests when calling
Camera::queueRequest(). If there are other issues, the comments below
may not apply to them.

When queuing a request to libcamera, we call
CameraWorker::queueRequest(). It's a void function that invokes
Worker::processRequest() as a queued call, so it returns immediately,
and we can't know if Worker::processRequest() will fail or not. We have
thus sent the request onto a journey, hoping it will be successfully
queued. There can be multiple reasons that won't be the case:

- CameraWorker::Worker::processRequest() failing to wait for a fence.
  Oops, shouldn't happen, but we still need to handle this gracefully.
  We're still in the HAL here, the request hasn't reached libcamera. The
  HAL should thus handle the failure. This is case number 1.

- CameraWorker::Worker::processRequest() calls CaptureRequest::queue(),
  which calls Camera::queueRequest(). That function can return an error
  if some of the sanity checks fail. We've reached libcamera, but it has
  failed synchronously, so the HAL should handle the failure. Still case
  number 1.

- At this point, the request is sent to PipelineHandler::queueRequest(),
  asynchronously again. That function doesn't perform any sanity check
  (but it could in the future), and queues the request to the pipeline
  handler with a call to PipelineHandler:queueRequestDevice(),
  synchronously. This call can fail. If the future sanity checks in
  PipelineHandler::queueRequest() or
  PipelineHandler:queueRequestDevice() fail, the request belongs to the
  libcamera core (the HAL has queued it successfully, and the pipeline
  handler hasn't accepted it). This is case number 2, and I believe this
  is the one that has triggered this patch.

- The pipeline handler has accepted the request, but something fails
  later. That's case number 3.

Now let's look at those three cases, in reverse order.

3. The request belongs to the pipeline handler, which must complete it.
We have support in the PipelineHandler base class to accept completion
of requests out of sequence, and reorder the completion events to the
HAL to guarantee they will be issued in sequence. The pipeline handler
can thus complete the request with an error state as soon as it detects
the error, and the HAL will eventually receive a requestComplete() call.
As far as I can tell, this is working correctly.

2. This is embarassing, we don't handle this at all.
PipelineHandler::queueRequest() returns an error code, but it never
reaches Camera::queueReuest() as the call to
PipelineHandler::queueRequest() is asynchronous. We should make
PipelineHandler::queueRequest() a void function to be less misleading,
and PipelineHandler::queueRequest() needs to signal request completion
in that case. I think we can reuse the same request completion ordering
mechanism and immediately mark the request as complete (with an error)
in PipelineHandler::queueRequest() in that case. It should be an easy
fix (famous last words... :-)).

1. This is the responsibility of the HAL, which should signal request
completion to the camera service. Depending on whether the camera
service can support out-of-order completion or not, we can signal
completion immediately, or need to keep the request in a queue and
complete it at the appropriate time.

As far as I understand, this patch is meant to address the memory leak
related to case 2, and also addresses as a consequence the leak related
to case 1 (case 3 should be working correctly already, if I'm not
mistaken). For case 2 I think the correct fix should be to ensure that
libcamera will complete the request, as explained above. For case 1, we
need a specific fix in the HAL, not only for the memory leak, but to
also complete the request towards the camera service.

Is this analysis correct, or am I missing something ?

> > +	if (node.empty()) {
> > +		LOG(HAL, Error) << "Unknown request: " << request->cookie();
> > +		status = CAMERA3_BUFFER_STATUS_ERROR;
> > +		return;
> > +	}
> > +	Camera3RequestDescriptor& descriptor = node.mapped();
> >
> >  	if (request->status() != Request::RequestComplete) {
> >  		LOG(HAL, Error) << "Request not successfully completed: "
> > @@ -1915,7 +1922,7 @@ void CameraDevice::requestComplete(Request *request)
> >  	}
> >
> >  	LOG(HAL, Debug) << "Request " << request->cookie() << " completed with "
> > -			<< descriptor->buffers_.size() << " streams";
> > +			<< descriptor.buffers_.size() << " streams";
> >
> >  	/*
> >  	 * \todo The timestamp used for the metadata is currently always taken
> > @@ -1924,10 +1931,10 @@ void CameraDevice::requestComplete(Request *request)
> >  	 * pipeline handlers) timestamp in the Request itself.
> >  	 */
> >  	uint64_t timestamp = buffers.begin()->second->metadata().timestamp;
> > -	resultMetadata = getResultMetadata(*descriptor, timestamp);
> > +	resultMetadata = getResultMetadata(descriptor, timestamp);
> >
> >  	/* Handle any JPEG compression. */
> > -	for (camera3_stream_buffer_t &buffer : descriptor->buffers_) {
> > +	for (camera3_stream_buffer_t &buffer : descriptor.buffers_) {
> >  		CameraStream *cameraStream =
> >  			static_cast<CameraStream *>(buffer.stream->priv);
> >
> > @@ -1942,7 +1949,7 @@ void CameraDevice::requestComplete(Request *request)
> >
> >  		int ret = cameraStream->process(*src,
> >  						*buffer.buffer,
> > -						descriptor->settings_,
> > +						descriptor.settings_,
> >  						resultMetadata.get());
> >  		if (ret) {
> >  			status = CAMERA3_BUFFER_STATUS_ERROR;
> > @@ -1959,17 +1966,17 @@ void CameraDevice::requestComplete(Request *request)
> >
> >  	/* Prepare to call back the Android camera stack. */
> >  	camera3_capture_result_t captureResult = {};
> > -	captureResult.frame_number = descriptor->frameNumber_;
> > -	captureResult.num_output_buffers = descriptor->buffers_.size();
> > -	for (camera3_stream_buffer_t &buffer : descriptor->buffers_) {
> > +	captureResult.frame_number = descriptor.frameNumber_;
> > +	captureResult.num_output_buffers = descriptor.buffers_.size();
> > +	for (camera3_stream_buffer_t &buffer : descriptor.buffers_) {
> >  		buffer.acquire_fence = -1;
> >  		buffer.release_fence = -1;
> >  		buffer.status = status;
> >  	}
> > -	captureResult.output_buffers = descriptor->buffers_.data();
> > +	captureResult.output_buffers = descriptor.buffers_.data();
> >
> >  	if (status == CAMERA3_BUFFER_STATUS_OK) {
> > -		notifyShutter(descriptor->frameNumber_, timestamp);
> > +		notifyShutter(descriptor.frameNumber_, timestamp);
> >
> >  		captureResult.partial_result = 1;
> >  		captureResult.result = resultMetadata->get();
> > @@ -1982,13 +1989,11 @@ void CameraDevice::requestComplete(Request *request)
> >  		 * is here signalled. Make sure the error path plays well with
> >  		 * the camera stack state machine.
> >  		 */
> > -		notifyError(descriptor->frameNumber_,
> > -			    descriptor->buffers_[0].stream);
> > +		notifyError(descriptor.frameNumber_,
> > +			    descriptor.buffers_[0].stream);
> >  	}
> >
> >  	callbacks_->process_capture_result(callbacks_, &captureResult);
> > -
> > -	delete descriptor;
> >  }
> >
> >  std::string CameraDevice::logPrefix() const
> > diff --git a/src/android/camera_device.h b/src/android/camera_device.h
> > index 11bdfec8..54d7c319 100644
> > --- a/src/android/camera_device.h
> > +++ b/src/android/camera_device.h
> > @@ -69,9 +69,11 @@ private:
> >  	CameraDevice(unsigned int id, std::shared_ptr<libcamera::Camera> camera);
> >
> >  	struct Camera3RequestDescriptor {
> > +		Camera3RequestDescriptor();
> > +		~Camera3RequestDescriptor();
> >  		Camera3RequestDescriptor(libcamera::Camera *camera,
> >  					 const camera3_capture_request_t *camera3Request);
> > -		~Camera3RequestDescriptor();
> > +		Camera3RequestDescriptor& operator=(Camera3RequestDescriptor&&);
> >
> >  		uint32_t frameNumber_;
> >  		std::vector<camera3_stream_buffer_t> buffers_;
> > @@ -122,6 +124,8 @@ private:
> >  	std::map<int, libcamera::PixelFormat> formatsMap_;
> >  	std::vector<CameraStream> streams_;
> >
> > +	std::map<uint64_t, Camera3RequestDescriptor> descriptors_;
> > +
> >  	std::string maker_;
> >  	std::string model_;
> >
> > diff --git a/src/android/camera_worker.cpp b/src/android/camera_worker.cpp
> > index df436460..300ddde0 100644
> > --- a/src/android/camera_worker.cpp
> > +++ b/src/android/camera_worker.cpp
> > @@ -26,10 +26,10 @@ LOG_DECLARE_CATEGORY(HAL)
> >   * by the CameraWorker which queues it to the libcamera::Camera after handling
> >   * fences.
> >   */
> > -CaptureRequest::CaptureRequest(libcamera::Camera *camera, uint64_t cookie)
> > +CaptureRequest::CaptureRequest(libcamera::Camera *camera)
> >  	: camera_(camera)
> >  {
> > -	request_ = camera_->createRequest(cookie);
> > +	request_ = camera_->createRequest(reinterpret_cast<uint64_t>(this));
> >  }
> >
> >  void CaptureRequest::addBuffer(Stream *stream, FrameBuffer *buffer, int fence)
> > diff --git a/src/android/camera_worker.h b/src/android/camera_worker.h
> > index d7060576..64b1658b 100644
> > --- a/src/android/camera_worker.h
> > +++ b/src/android/camera_worker.h
> > @@ -22,7 +22,7 @@ class CameraDevice;
> >  class CaptureRequest
> >  {
> >  public:
> > -	CaptureRequest(libcamera::Camera *camera, uint64_t cookie);
> > +	CaptureRequest(libcamera::Camera *camera);
> >
> >  	const std::vector<int> &fences() const { return acquireFences_; }
> >  	libcamera::ControlList &controls() { return request_->controls(); }

-- 
Regards,

Laurent Pinchart


More information about the libcamera-devel mailing list