@article {aGene-Mola23, title = {Looking behind occlusions: A study on amodal segmentation for robust on-tree apple fruit size estimation}, journal = {Computers and Electronics in Agriculture}, volume = {209}, year = {2023}, month = {04/2023}, abstract = {

The detection and sizing of fruits with computer vision methods is of interest because it provides relevant information to improve the management of orchard farming. However, the presence of partially occluded fruits limits the performance of existing methods, making reliable fruit sizing a challenging task. While previous fruit segmentation works limit segmentation to the visible region of fruits (known as modal segmentation), in this work we propose an amodal segmentation algorithm to predict the complete shape, which includes its visible and occluded regions. To do so, an end-to-end convolutional neural network (CNN) for simultaneous modal and amodal instance segmentation was implemented. The predicted amodal masks were used to estimate the fruit diameters in pixels. Modal masks were used to identify the visible region and measure the distance between the apples and the camera using the depth image. Finally, the fruit diameters in millimetres (mm) were computed by applying the pinhole camera model. The method was developed with a Fuji apple dataset consisting of 3925 RGB-D images acquired at different growth stages with a total of 15,335 annotated apples, and was subsequently tested in a case study to measure the diameter of Elstar apples at different growth stages. Fruit detection results showed an F1-score of 0.86 and the fruit diameter results reported a mean absolute error (MAE) of 4.5\ mm and R2\ =\ 0.80 irrespective of fruit visibility. Besides the diameter estimation, modal and amodal masks were used to automatically determine the percentage of visibility of measured apples. This feature was used as a confidence value, improving the diameter estimation to MAE\ =\ 2.93\ mm and R2\ =\ 0.91 when limiting the size estimation to fruits detected with a visibility higher than 60\%. The main advantages of the present methodology are its robustness for measuring partially occluded fruits and the capability to determine the visibility percentage. The main limitation is that depth images were generated by means of photogrammetry methods, which limits the efficiency of data acquisition. To overcome this limitation, future works should consider the use of commercial RGB-D sensors. The code and the dataset used to evaluate the method have been made publicly available at\ https://github.com/GRAP-UdL-AT/Amodal_Fruit_Sizing.

}, keywords = {deep learning, Fruit detection, Fruit measurement, Fruit visibility, Precision agriculture, Yield estimation}, issn = {ISSN 0168-1699}, doi = {https://doi.org/10.1016/j.compag.2023.107854}, url = {https://authors.elsevier.com/sd/article/S0168-1699(23)00242-9}, author = {Gen{\'e}-Mola, Jordi and Ferrer-Ferrer, M. and Gregorio, Eduard and Blok, P. M. and Hemming, J. and Morros, J.R. and Rosell-Polo, Joan R. and Ver{\'o}nica Vilaplana and Ruiz-Hidalgo, J.} }