<div dir="ltr"><div dir="ltr">Hi Laurent and Nicolas,<div><br></div></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Mon, 2 Aug 2021 at 00:43, Laurent Pinchart <<a href="mailto:laurent.pinchart@ideasonboard.com">laurent.pinchart@ideasonboard.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">Hi Nícolas,<br>
<br>
Thank you for the patch.<br>
<br>
On Thu, Jul 22, 2021 at 08:28:49PM -0300, Nícolas F. R. A. Prado wrote:<br>
> Pipelines have relied on bufferCount to decide on the number of buffers<br>
> to allocate internally through allocateBuffers() and on the number of<br>
> V4L2 buffer slots to reserve through importBuffers(). Instead, the<br>
> number of internal buffers should be the minimum required by the<br>
> algorithms to avoid wasting memory, and the number of V4L2 buffer slots<br>
> should overallocate to avoid thrashing dmabuf mappings.<br>
> <br>
> For now, just set them to constants and stop relying on bufferCount, to<br>
> allow for its removal.<br>
> <br>
> Signed-off-by: Nícolas F. R. A. Prado <<a href="mailto:nfraprado@collabora.com" target="_blank">nfraprado@collabora.com</a>><br>
> ---<br>
> <br>
> No changes in v7<br>
> <br>
> Changes in v6:<br>
> - Added pipeline name as prefix to each BUFFER_SLOT_COUNT and<br>
> INTERNAL_BUFFER_COUNT constant<br>
> <br>
> src/libcamera/pipeline/ipu3/imgu.cpp | 12 ++++++------<br>
> src/libcamera/pipeline/ipu3/imgu.h | 5 ++++-<br>
> src/libcamera/pipeline/ipu3/ipu3.cpp | 9 +--------<br>
> .../pipeline/raspberrypi/raspberrypi.cpp | 15 +++++----------<br>
> src/libcamera/pipeline/rkisp1/rkisp1.cpp | 9 ++-------<br>
> src/libcamera/pipeline/rkisp1/rkisp1_path.cpp | 2 +-<br>
> src/libcamera/pipeline/rkisp1/rkisp1_path.h | 3 +++<br>
> src/libcamera/pipeline/simple/converter.cpp | 4 ++--<br>
> src/libcamera/pipeline/simple/converter.h | 3 +++<br>
> src/libcamera/pipeline/simple/simple.cpp | 6 ++----<br>
> src/libcamera/pipeline/uvcvideo/uvcvideo.cpp | 5 +++--<br>
> src/libcamera/pipeline/vimc/vimc.cpp | 5 +++--<br>
> 12 files changed, 35 insertions(+), 43 deletions(-)<br>
<br>
Given that some of the pipeline handlers will need more intrusive<br>
changes to address the comments below, you could split this with one<br>
patch per pipeline handler (or perhaps grouping the easy ones together).<br>
<br>
> <br>
> diff --git a/src/libcamera/pipeline/ipu3/imgu.cpp b/src/libcamera/pipeline/ipu3/imgu.cpp<br>
> index e955bc3456ba..f36e99dacbe7 100644<br>
> --- a/src/libcamera/pipeline/ipu3/imgu.cpp<br>
> +++ b/src/libcamera/pipeline/ipu3/imgu.cpp<br>
> @@ -593,22 +593,22 @@ int ImgUDevice::configureVideoDevice(V4L2VideoDevice *dev, unsigned int pad,<br>
> /**<br>
> * \brief Allocate buffers for all the ImgU video devices<br>
> */<br>
> -int ImgUDevice::allocateBuffers(unsigned int bufferCount)<br>
> +int ImgUDevice::allocateBuffers()<br>
> {<br>
> /* Share buffers between CIO2 output and ImgU input. */<br>
> - int ret = input_->importBuffers(bufferCount);<br>
> + int ret = input_->importBuffers(IPU3_BUFFER_SLOT_COUNT);<br>
> if (ret) {<br>
> LOG(IPU3, Error) << "Failed to import ImgU input buffers";<br>
> return ret;<br>
> }<br>
> <br>
> - ret = param_->allocateBuffers(bufferCount, ¶mBuffers_);<br>
> + ret = param_->allocateBuffers(IPU3_INTERNAL_BUFFER_COUNT, ¶mBuffers_);<br>
> if (ret < 0) {<br>
> LOG(IPU3, Error) << "Failed to allocate ImgU param buffers";<br>
> goto error;<br>
> }<br>
> <br>
> - ret = stat_->allocateBuffers(bufferCount, &statBuffers_);<br>
> + ret = stat_->allocateBuffers(IPU3_INTERNAL_BUFFER_COUNT, &statBuffers_);<br>
> if (ret < 0) {<br>
> LOG(IPU3, Error) << "Failed to allocate ImgU stat buffers";<br>
> goto error;<br>
> @@ -619,13 +619,13 @@ int ImgUDevice::allocateBuffers(unsigned int bufferCount)<br>
> * corresponding stream is active or inactive, as the driver needs<br>
> * buffers to be requested on the V4L2 devices in order to operate.<br>
> */<br>
> - ret = output_->importBuffers(bufferCount);<br>
> + ret = output_->importBuffers(IPU3_BUFFER_SLOT_COUNT);<br>
> if (ret < 0) {<br>
> LOG(IPU3, Error) << "Failed to import ImgU output buffers";<br>
> goto error;<br>
> }<br>
> <br>
> - ret = viewfinder_->importBuffers(bufferCount);<br>
> + ret = viewfinder_->importBuffers(IPU3_BUFFER_SLOT_COUNT);<br>
> if (ret < 0) {<br>
> LOG(IPU3, Error) << "Failed to import ImgU viewfinder buffers";<br>
> goto error;<br>
> diff --git a/src/libcamera/pipeline/ipu3/imgu.h b/src/libcamera/pipeline/ipu3/imgu.h<br>
> index 9d4915116087..f934a951fc75 100644<br>
> --- a/src/libcamera/pipeline/ipu3/imgu.h<br>
> +++ b/src/libcamera/pipeline/ipu3/imgu.h<br>
> @@ -61,7 +61,7 @@ public:<br>
> outputFormat);<br>
> }<br>
> <br>
> - int allocateBuffers(unsigned int bufferCount);<br>
> + int allocateBuffers();<br>
> void freeBuffers();<br>
> <br>
> int start();<br>
> @@ -86,6 +86,9 @@ private:<br>
> static constexpr unsigned int PAD_VF = 3;<br>
> static constexpr unsigned int PAD_STAT = 4;<br>
> <br>
> + static constexpr unsigned int IPU3_INTERNAL_BUFFER_COUNT = 4;<br>
> + static constexpr unsigned int IPU3_BUFFER_SLOT_COUNT = 5;<br>
<br>
5 buffer slots is low. It means that if applications cycle more than 5<br>
buffers, the V4L2VideoDevice cache that maintains associations between<br>
dmabufs and buffer slots will the trashed. Due to the internal queue of<br>
requests in the IPU3 pipeline handler (similar to what you have<br>
implemented in "[PATCH 0/3] libcamera: pipeline: Add internal request<br>
queue" for other pipeline handlers), we won't fail at queuing requests,<br>
but performance will suffer. I thus think we need to increase the number<br>
of slots to what applications can be reasonably expected to use. We<br>
could use 8, or even 16, as buffer slots are cheap. The same holds for<br>
other pipeline handlers.<br>
<br>
The number of slots for the CIO2 output should match the number of<br>
buffer slots for the ImgU input, as the same buffers are used on the two<br>
video devices. One option is to use IPU3_BUFFER_SLOT_COUNT for the CIO2,<br>
instead of CIO2_BUFFER_COUNT. However, the number of internal CIO2<br>
buffers that are allocated by exportBuffers() in CIO2Device::start(), to<br>
be used in case the application doesn't provide any RAW buffer, should<br>
be lower, as those are real buffer and are thus expensive. The number of<br>
buffers and buffer slots on the CIO2 thus needs to be decoupled.<br>
<br>
For proper operation, the CIO2 will require at least two queued buffers<br>
(one being DMA'ed to, and one waiting). We need at least one extra<br>
buffer queued to the ImgU to keep buffers flowing. Depending on<br>
processing timings, it may be that the ImgU will complete processing of<br>
its buffer before the CIO2 captures the next one, leading to a temporary<br>
situation where the CIO2 will have three buffers queued, or the CIO2<br>
will finish the capture first, leading to a temporary situation where<br>
the CIO2 will have one buffer queued and the ImgU will have two buffers<br>
queued. In either case, shortly afterwards, the other component will<br>
complete capture or processing, and we'll get back to a situation with<br>
two buffers queued in the CIO2 and one in the ImgU. That's thus a<br>
minimum of three buffers for raw images.<br>
<br>
>From an ImgU point of view, we could probably get away with a single<br>
parameter and a single stats buffer. This would however not allow<br>
queuing the next frame for processing in the ImgU before the current<br>
frame completes, so two buffers would be better. Now, if we take the IPA<br>
into account, the statistics buffer will spend some time on the IPA side<br>
for processing. It would thus be best to have an extra statistics buffer<br>
to accommodate that, thus requiring three statistics buffers (and three<br>
parameters buffers, as we associate them together).<br>
<br>
This rationale leads to using the same number of internal buffers for<br>
the CIO2, the parameters and the statistics. We currently use four, and<br>
while the logic above indicates we could get away with three, it would<br>
be safer to keep using four in this patch, and possibly reduce the<br>
number of buffers later.<br>
<br>
I know documentation isn't fun, but I think this rationale should be<br>
captured in a comment in the IPU3 pipeline handler, along with a \todo<br>
item to try and lower the number of internal buffers to three.<br>
<br>
> +<br>
> int linkSetup(const std::string &source, unsigned int sourcePad,<br>
> const std::string &sink, unsigned int sinkPad,<br>
> bool enable);<br>
> diff --git a/src/libcamera/pipeline/ipu3/ipu3.cpp b/src/libcamera/pipeline/ipu3/ipu3.cpp<br>
> index 5fd1757bfe13..4efd201c05e5 100644<br>
> --- a/src/libcamera/pipeline/ipu3/ipu3.cpp<br>
> +++ b/src/libcamera/pipeline/ipu3/ipu3.cpp<br>
> @@ -681,16 +681,9 @@ int PipelineHandlerIPU3::allocateBuffers(Camera *camera)<br>
> {<br>
> IPU3CameraData *data = cameraData(camera);<br>
> ImgUDevice *imgu = data->imgu_;<br>
> - unsigned int bufferCount;<br>
> int ret;<br>
> <br>
> - bufferCount = std::max({<br>
> - data->outStream_.configuration().bufferCount,<br>
> - data->vfStream_.configuration().bufferCount,<br>
> - data->rawStream_.configuration().bufferCount,<br>
> - });<br>
> -<br>
> - ret = imgu->allocateBuffers(bufferCount);<br>
> + ret = imgu->allocateBuffers();<br>
> if (ret < 0)<br>
> return ret;<br>
> <br>
> diff --git a/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp b/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp<br>
> index d1cd3d9dc082..776e0f92aed1 100644<br>
> --- a/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp<br>
> +++ b/src/libcamera/pipeline/raspberrypi/raspberrypi.cpp<br>
> @@ -1149,20 +1149,15 @@ int PipelineHandlerRPi::prepareBuffers(Camera *camera)<br>
> {<br>
> RPiCameraData *data = cameraData(camera);<br>
> int ret;<br>
> + constexpr unsigned int bufferCount = 4;<br>
> <br>
> /*<br>
> - * Decide how many internal buffers to allocate. For now, simply look<br>
> - * at how many external buffers will be provided. We'll need to improve<br>
> - * this logic. However, we really must have all streams allocate the same<br>
> - * number of buffers to simplify error handling in queueRequestDevice().<br>
> + * Allocate internal buffers. We really must have all streams allocate<br>
> + * the same number of buffers to simplify error handling in<br>
> + * queueRequestDevice().<br>
> */<br>
> - unsigned int maxBuffers = 0;<br>
> - for (const Stream *s : camera->streams())<br>
> - if (static_cast<const RPi::Stream *>(s)->isExternal())<br>
> - maxBuffers = std::max(maxBuffers, s->configuration().bufferCount);<br>
> -<br>
> for (auto const stream : data->streams_) {<br>
> - ret = stream->prepareBuffers(maxBuffers);<br>
> + ret = stream->prepareBuffers(bufferCount);<br>
<br>
We have a similar problem here, 4 buffer slots is too little, but when<br>
the stream has to allocate internal buffers (!importOnly), which is the<br>
case for most streams, we don't want to overallocate.<br>
<br>
I'd like to get feedback from Naush here, but I think this means we'll<br>
have to relax the requirement documented in the comment above, and<br>
accept a different number of buffers for each stream.<br></blockquote><div><br></div><div>Sorry for the late reply to this thread!</div><div><br></div><div>As is evident from the above comment, this bit of code does need to be improved</div><div>to avoid over-applications which I will get to at some point. However, to address this</div><div>change and the comments, 4 buffer slots sounds like it might be too little. Regarding</div><div>the requirement on having streams allocate the same number of buffers - that can be</div><div>relaxed (and the comment removed) as we do handle it correctly.</div><div><br></div><div>Regards,</div><div>Naush</div><div><br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
<br>
> if (ret < 0)<br>
> return ret;<br>
> }<br>
> diff --git a/src/libcamera/pipeline/rkisp1/rkisp1.cpp b/src/libcamera/pipeline/rkisp1/rkisp1.cpp<br>
> index 11325875b929..f4ea2fd4d4d0 100644<br>
> --- a/src/libcamera/pipeline/rkisp1/rkisp1.cpp<br>
> +++ b/src/libcamera/pipeline/rkisp1/rkisp1.cpp<br>
> @@ -690,16 +690,11 @@ int PipelineHandlerRkISP1::allocateBuffers(Camera *camera)<br>
> unsigned int ipaBufferId = 1;<br>
> int ret;<br>
> <br>
> - unsigned int maxCount = std::max({<br>
> - data->mainPathStream_.configuration().bufferCount,<br>
> - data->selfPathStream_.configuration().bufferCount,<br>
> - });<br>
> -<br>
> - ret = param_->allocateBuffers(maxCount, ¶mBuffers_);<br>
> + ret = param_->allocateBuffers(RKISP1_INTERNAL_BUFFER_COUNT, ¶mBuffers_);<br>
> if (ret < 0)<br>
> goto error;<br>
> <br>
> - ret = stat_->allocateBuffers(maxCount, &statBuffers_);<br>
> + ret = stat_->allocateBuffers(RKISP1_INTERNAL_BUFFER_COUNT, &statBuffers_);<br>
> if (ret < 0)<br>
> goto error;<br>
> <br>
> diff --git a/src/libcamera/pipeline/rkisp1/rkisp1_path.cpp b/src/libcamera/pipeline/rkisp1/rkisp1_path.cpp<br>
> index 25f482eb8d8e..fea330f72886 100644<br>
> --- a/src/libcamera/pipeline/rkisp1/rkisp1_path.cpp<br>
> +++ b/src/libcamera/pipeline/rkisp1/rkisp1_path.cpp<br>
> @@ -172,7 +172,7 @@ int RkISP1Path::start()<br>
> return -EBUSY;<br>
> <br>
> /* \todo Make buffer count user configurable. */<br>
> - ret = video_->importBuffers(RKISP1_BUFFER_COUNT);<br>
> + ret = video_->importBuffers(RKISP1_BUFFER_SLOT_COUNT);<br>
> if (ret)<br>
> return ret;<br>
> <br>
> diff --git a/src/libcamera/pipeline/rkisp1/rkisp1_path.h b/src/libcamera/pipeline/rkisp1/rkisp1_path.h<br>
> index 91757600ccdc..3c5891009c58 100644<br>
> --- a/src/libcamera/pipeline/rkisp1/rkisp1_path.h<br>
> +++ b/src/libcamera/pipeline/rkisp1/rkisp1_path.h<br>
> @@ -27,6 +27,9 @@ class V4L2Subdevice;<br>
> struct StreamConfiguration;<br>
> struct V4L2SubdeviceFormat;<br>
> <br>
> +static constexpr unsigned int RKISP1_INTERNAL_BUFFER_COUNT = 4;<br>
> +static constexpr unsigned int RKISP1_BUFFER_SLOT_COUNT = 5;<br>
<br>
The situation should be simpler for the rkisp1, as it has a different<br>
pipeline model (inline ISP as opposed to offline ISP for the IPU3). We<br>
can allocate more slots (8 or 16, as for other pipeline handlers), and<br>
restrict the number of internal buffers (for stats and parameters) to<br>
the number of requests we expect to queue to the device at once, plus<br>
one for the IPA. Four thus seems good. Capturing this rationale in a<br>
comment would be good too.<br>
<br>
BTW, I may be too tired to think properly, or just unable to see the<br>
obvious, so please challenge any rationale you think is incorrect.<br>
<br>
> +<br>
> class RkISP1Path<br>
> {<br>
> public:<br>
> diff --git a/src/libcamera/pipeline/simple/converter.cpp b/src/libcamera/pipeline/simple/converter.cpp<br>
> index b5e34c4cd0c5..b3bcf01483f7 100644<br>
> --- a/src/libcamera/pipeline/simple/converter.cpp<br>
> +++ b/src/libcamera/pipeline/simple/converter.cpp<br>
> @@ -103,11 +103,11 @@ int SimpleConverter::Stream::exportBuffers(unsigned int count,<br>
> <br>
> int SimpleConverter::Stream::start()<br>
> {<br>
> - int ret = m2m_->output()->importBuffers(inputBufferCount_);<br>
> + int ret = m2m_->output()->importBuffers(SIMPLE_BUFFER_SLOT_COUNT);<br>
<br>
Shouldn't this be SIMPLE_INTERNAL_BUFFER_COUNT ? Overallocating is not<br>
much of an issue I suppose.<br>
<br>
> if (ret < 0)<br>
> return ret;<br>
> <br>
> - ret = m2m_->capture()->importBuffers(outputBufferCount_);<br>
> + ret = m2m_->capture()->importBuffers(SIMPLE_BUFFER_SLOT_COUNT);<br>
> if (ret < 0) {<br>
> stop();<br>
> return ret;<br>
> diff --git a/src/libcamera/pipeline/simple/converter.h b/src/libcamera/pipeline/simple/converter.h<br>
> index 276a2a291c21..7e1d60674f62 100644<br>
> --- a/src/libcamera/pipeline/simple/converter.h<br>
> +++ b/src/libcamera/pipeline/simple/converter.h<br>
> @@ -29,6 +29,9 @@ class SizeRange;<br>
> struct StreamConfiguration;<br>
> class V4L2M2MDevice;<br>
> <br>
> +constexpr unsigned int SIMPLE_INTERNAL_BUFFER_COUNT = 5;<br>
> +constexpr unsigned int SIMPLE_BUFFER_SLOT_COUNT = 5;<br>
<br>
Let's name the variables kSimpleInternalBufferCount and<br>
kSimpleBufferSlotCount, as that's the naming scheme we're moving to for<br>
non-macro constants. Same comment elsewhere in this patch.<br>
<br>
Those constants don't belong to converter.h. Could you turn them into<br>
member constants of the SimplePipelineHandler class, as<br>
kNumInternalBuffers (which btw should be removed) ? The number of buffer<br>
slots can be passed as a parameter to SimpleConverter::start().<br>
<br>
There's no stats or parameters here, and no IPA, so the situation is<br>
different than for IPU3 and RkISP1. The number of internal buffers<br>
should just be one more than the minimum number of buffers required by<br>
the capture device, I don't think there's another requirement.<br>
<br>
> +<br>
> class SimpleConverter<br>
> {<br>
> public:<br>
> diff --git a/src/libcamera/pipeline/simple/simple.cpp b/src/libcamera/pipeline/simple/simple.cpp<br>
> index 1c25a7344f5f..a1163eaf8be2 100644<br>
> --- a/src/libcamera/pipeline/simple/simple.cpp<br>
> +++ b/src/libcamera/pipeline/simple/simple.cpp<br>
> @@ -803,12 +803,10 @@ int SimplePipelineHandler::start(Camera *camera, [[maybe_unused]] const ControlL<br>
> * When using the converter allocate a fixed number of internal<br>
> * buffers.<br>
> */<br>
> - ret = video->allocateBuffers(kNumInternalBuffers,<br>
> + ret = video->allocateBuffers(SIMPLE_INTERNAL_BUFFER_COUNT,<br>
> &data->converterBuffers_);<br>
> } else {<br>
> - /* Otherwise, prepare for using buffers from the only stream. */<br>
> - Stream *stream = &data->streams_[0];<br>
> - ret = video->importBuffers(stream->configuration().bufferCount);<br>
> + ret = video->importBuffers(SIMPLE_BUFFER_SLOT_COUNT);<br>
> }<br>
> if (ret < 0)<br>
> return ret;<br>
> diff --git a/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp b/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp<br>
> index fd39b3d3c72c..755949e7a59a 100644<br>
> --- a/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp<br>
> +++ b/src/libcamera/pipeline/uvcvideo/uvcvideo.cpp<br>
> @@ -91,6 +91,8 @@ private:<br>
> return static_cast<UVCCameraData *>(<br>
> PipelineHandler::cameraData(camera));<br>
> }<br>
> +<br>
> + static constexpr unsigned int UVC_BUFFER_SLOT_COUNT = 5;<br>
> };<br>
> <br>
> UVCCameraConfiguration::UVCCameraConfiguration(UVCCameraData *data)<br>
> @@ -236,9 +238,8 @@ int PipelineHandlerUVC::exportFrameBuffers(Camera *camera,<br>
> int PipelineHandlerUVC::start(Camera *camera, [[maybe_unused]] const ControlList *controls)<br>
> {<br>
> UVCCameraData *data = cameraData(camera);<br>
> - unsigned int count = data->stream_.configuration().bufferCount;<br>
> <br>
> - int ret = data->video_->importBuffers(count);<br>
> + int ret = data->video_->importBuffers(UVC_BUFFER_SLOT_COUNT);<br>
<br>
For the uvc and vimc pipeline handlers, we have no internal buffers, so<br>
it's quite easy. We should have 8 or 16 slots, as for other pipeline<br>
handlers.<br>
<br>
> if (ret < 0)<br>
> return ret;<br>
> <br>
> diff --git a/src/libcamera/pipeline/vimc/vimc.cpp b/src/libcamera/pipeline/vimc/vimc.cpp<br>
> index e89d53182c6d..24ba743a946c 100644<br>
> --- a/src/libcamera/pipeline/vimc/vimc.cpp<br>
> +++ b/src/libcamera/pipeline/vimc/vimc.cpp<br>
> @@ -102,6 +102,8 @@ private:<br>
> return static_cast<VimcCameraData *>(<br>
> PipelineHandler::cameraData(camera));<br>
> }<br>
> +<br>
> + static constexpr unsigned int VIMC_BUFFER_SLOT_COUNT = 5;<br>
> };<br>
> <br>
> namespace {<br>
> @@ -312,9 +314,8 @@ int PipelineHandlerVimc::exportFrameBuffers(Camera *camera,<br>
> int PipelineHandlerVimc::start(Camera *camera, [[maybe_unused]] const ControlList *controls)<br>
> {<br>
> VimcCameraData *data = cameraData(camera);<br>
> - unsigned int count = data->stream_.configuration().bufferCount;<br>
> <br>
> - int ret = data->video_->importBuffers(count);<br>
> + int ret = data->video_->importBuffers(VIMC_BUFFER_SLOT_COUNT);<br>
> if (ret < 0)<br>
> return ret;<br>
> <br>
<br>
-- <br>
Regards,<br>
<br>
Laurent Pinchart<br>
</blockquote></div></div>