@article {aGene-Molad, title = {AmodalAppleSize_RGB-D dataset: RGB-D images of apple trees annotated with modal and amodal segmentation masks for fruit detection, visibility and size estimation}, journal = {Data in Brief}, volume = {52}, year = {2024}, month = {02/2024}, abstract = {

The present dataset comprises a collection of RGB-D apple tree images that can be used to train and test computer vision-based fruit detection and sizing methods. This dataset encompasses two distinct sets of data obtained from a Fuji and an Elstar apple orchards. The Fuji apple orchard sub-set consists of 3925 RGB-D images containing a total of 15335 apples annotated with both modal and amodal apple segmentation masks. Modal masks denote the visible portions of the apples, whereas amodal masks encompass both visible and occluded apple regions. Notably, this dataset is the first public resource to incorporate on-tree fruit amodal masks. This pioneering inclusion addresses a critical gap in existing datasets, enabling the development of robust automatic fruit sizing methods and accurate fruit visibility estimation, particularly in the presence of partial occlusions. Besides the fruit segmentation masks, the dataset also includes the fruit size (calliper) ground truth for each annotated apple. The second sub- set comprises 2731 RGB-D images capturing five Elstar apple trees at four distinct growth stages. This sub-set includes mean diameter information for each tree at every growth stage and serves as a valuable resource for evaluating fruit sizing methods trained with the first sub-set. The present data was employed in the research papers titled "Looking behind occlusions: a study on amodal segmentation for robust on-tree apple fruit size estimation" [1] and {\textquotedblleft}Simultaneous fruit detection and size estimation using multitask deep neural networks{\textquotedblright} [2].

}, keywords = {Agricultural robotics, amodal segmentation, depth image, Fruit measurement, Fruit visibility, Instance Segmentation, modal segmentation, Yield prediction}, doi = {https://doi.org/10.1016/j.dib.2023.110000}, author = {Gen{\'e}-Mola, Jordi and Ferrer-Ferrer, M. and Hemming, J. and Dalfsen, P. and Hoog, D. and Sanz-Cortiella, R. and Rosell-Polo, Joan R. and Morros, J.R. and Ver{\'o}nica Vilaplana and Ruiz-Hidalgo, J. and Gregorio, Eduard} } @article {aGene-Mola23, title = {Looking behind occlusions: A study on amodal segmentation for robust on-tree apple fruit size estimation}, journal = {Computers and Electronics in Agriculture}, volume = {209}, year = {2023}, month = {04/2023}, abstract = {

The detection and sizing of fruits with computer vision methods is of interest because it provides relevant information to improve the management of orchard farming. However, the presence of partially occluded fruits limits the performance of existing methods, making reliable fruit sizing a challenging task. While previous fruit segmentation works limit segmentation to the visible region of fruits (known as modal segmentation), in this work we propose an amodal segmentation algorithm to predict the complete shape, which includes its visible and occluded regions. To do so, an end-to-end convolutional neural network (CNN) for simultaneous modal and amodal instance segmentation was implemented. The predicted amodal masks were used to estimate the fruit diameters in pixels. Modal masks were used to identify the visible region and measure the distance between the apples and the camera using the depth image. Finally, the fruit diameters in millimetres (mm) were computed by applying the pinhole camera model. The method was developed with a Fuji apple dataset consisting of 3925 RGB-D images acquired at different growth stages with a total of 15,335 annotated apples, and was subsequently tested in a case study to measure the diameter of Elstar apples at different growth stages. Fruit detection results showed an F1-score of 0.86 and the fruit diameter results reported a mean absolute error (MAE) of 4.5\ mm and R2\ =\ 0.80 irrespective of fruit visibility. Besides the diameter estimation, modal and amodal masks were used to automatically determine the percentage of visibility of measured apples. This feature was used as a confidence value, improving the diameter estimation to MAE\ =\ 2.93\ mm and R2\ =\ 0.91 when limiting the size estimation to fruits detected with a visibility higher than 60\%. The main advantages of the present methodology are its robustness for measuring partially occluded fruits and the capability to determine the visibility percentage. The main limitation is that depth images were generated by means of photogrammetry methods, which limits the efficiency of data acquisition. To overcome this limitation, future works should consider the use of commercial RGB-D sensors. The code and the dataset used to evaluate the method have been made publicly available at\ https://github.com/GRAP-UdL-AT/Amodal_Fruit_Sizing.

}, keywords = {deep learning, Fruit detection, Fruit measurement, Fruit visibility, Precision agriculture, Yield estimation}, issn = {ISSN 0168-1699}, doi = {https://doi.org/10.1016/j.compag.2023.107854}, url = {https://authors.elsevier.com/sd/article/S0168-1699(23)00242-9}, author = {Gen{\'e}-Mola, Jordi and Ferrer-Ferrer, M. and Gregorio, Eduard and Blok, P. M. and Hemming, J. and Morros, J.R. and Rosell-Polo, Joan R. and Ver{\'o}nica Vilaplana and Ruiz-Hidalgo, J.} } @article {aFerrer-Ferrer, title = {Simultaneous Fruit Detection and Size Estimation Using Multitask Deep Neural Networks }, journal = {Biosystems Engineering}, volume = {233}, year = {2023}, month = {09/2023}, pages = {63-75}, abstract = {

The measurement of fruit size is of great interest to estimate the yield and predict the harvest resources in advance. This work proposes a novel technique for in-field apple detection and measurement based on Deep Neural Networks. The proposed\ \ framework was trained with RGB-D data and consists of an end-to-end multitask Deep Neural Network architecture 13 specifically designed to perform the following tasks: 1) detection and segmentation of each fruit from its surroundings; 2) estimation of the diameter of each detected fruit. The methodology was tested with a total of 15335 annotated apples at different growth stages, with diameters varying from 27 mm to 95 mm. Fruit detection results reported an F1-score for apple detection of 0.88 and a mean absolute error of diameter estimation of 5.64 mm. These are state-of-the-art results with the additional advantages of: a) using an end-to-end multitask trainable network; b) an efficient and fast inference speed; and c) being based on RGB-D data which can be acquired with affordable depth cameras. On the contrary, the main disadvantage is the need of annotating a large amount of data with fruit masks and diameter ground truth to train the model. Finally, a fruit visibility analysis showed an improvement in the prediction when limiting the measurement to apples above 65\% of visibility (mean absolute error of 5.09 mm). This suggests that future works should develop a method for automatically identifying the most visible apples and discard the prediction of highly occluded fruits.

}, keywords = {deep learning, Fruit measurement, Fruit visibility, Precision agriculture, Yield estimation}, doi = {https://doi.org/10.1016/j.biosystemseng.2023.07.010}, author = {Ferrer-Ferrer, M. and Ruiz-Hidalgo, J. and Gregorio, Eduard and Ver{\'o}nica Vilaplana and Morros, J.R. and Gen{\'e}-Mola, Jordi} } @article {aGene-Molac, title = {Fruit detection and 3D location using instance segmentation neural networks and structure-from-motion photogrammetry}, journal = {Computers and Electronics in Agriculture}, volume = {169}, year = {2020}, month = {02/2020}, abstract = {

The development of remote fruit detection systems able to identify and 3D locate fruits provides opportunities to improve the efficiency of agriculture management. Most of the current fruit detection systems are based on 2D image analysis. Although the use of 3D sensors is emerging, precise 3D fruit location is still a pending issue. This work presents a new methodology for fruit detection and 3D location consisting of: (1) 2D fruit detection and segmentation using Mask R-CNN instance segmentation neural network; (2) 3D point cloud generation of detected apples using structure-from-motion (SfM) photogrammetry; (3) projection of 2D image detections onto 3D space; (4) false positives removal using a trained support vector machine. This methodology was tested on 11 Fuji apple trees containing a total of 1455 apples. Results showed that, by combining instance segmentation with SfM the system performance increased from an F1-score of 0.816 (2D fruit detection) to 0.881 (3D fruit detection and location) with respect to the total amount of fruits. The main advantages of this methodology are the reduced number of false positives and the higher detection rate, while the main disadvantage is the high processing time required for SfM, which makes it presently unsuitable for real-time work. From these results, it can be concluded that the combination of instance segmentation and SfM provides high performance fruit detection with high 3D data precision. The dataset has been made publicly available and an interactive visualization of fruit detection results is accessible at http://www.grap.udl.cat/documents/photogrammetry_fruit_detection.html

}, keywords = {Fruit detection, Fruit location, Mask R-CNN, Structure-from-motion, Terrestrial remote sensing}, issn = {ISSN: 0168-1699}, doi = {https://doi.org/10.1016/j.compag.2019.105165}, url = {https://doi.org/10.1016/j.compag.2019.105165}, author = {Gen{\'e}-Mola, Jordi and Sanz, Ricardo and Rosell-Polo, Joan R. and Morros, J.R. and Ruiz-Hidalgo, J. and Ver{\'o}nica Vilaplana and Gregorio, Eduard} } @article {aGene-Mola20, title = {Fuji-SfM dataset: A collection of annotated images and point clouds for Fuji apple detection and location using structure-from-motion photogrammetry}, volume = {Data in Brief}, year = {2020}, month = {06/2020}, keywords = {Fruit detection, Mask R-CNN, Photogrammetry, Structure-from-motion, Terrestrial remote sensing, Yield mapping, Yield prediction}, doi = {https://doi.org/10.1016/j.dib.2020.105591}, author = {Gen{\'e}-Mola, Jordi and Sanz, Ricardo and Rosell-Polo, Joan R. and Morros, J.R. and Ruiz-Hidalgo, J. and Ver{\'o}nica Vilaplana and Gregorio, Eduard} } @article {aGene-Molab, title = {Fruit Detection in an Apple Orchard Using a Mobile Terrestrial Laser Scanner}, journal = {Biosystems Engineering}, volume = {187}, year = {2019}, month = {09/2019}, chapter = {171}, abstract = {

The development of reliable fruit detection and localization systems provides an opportunity to improve the crop value and management by limiting fruit spoilage and optimized harvesting practices. Most proposed systems for fruit detection are based on RGB cameras and thus are affected by intrinsic constraints, such as variable lighting conditions. This work presents a new technique that uses a mobile terrestrial laser scanner (MTLS) to detect and localise Fuji apples. An experimental test focused on Fuji apple trees (Malus domestica Borkh. cv. Fuji) was carried out. A 3D point cloud of the scene was generated using an MTLS composed of a Velodyne VLP-16 LiDAR sensor synchronized with an RTK-GNSS satellite navigation receiver. A reflectance analysis of tree elements was performed, obtaining mean apparent reflectance values of 28.9\%, 29.1\%, and 44.3\% for leaves, branches and trunks, and apples, respectively. These results suggest that the apparent reflectance parameter (at 905 nm wavelength) can be useful to detect apples. For that purpose, a four-step fruit detection algorithm was developed. By applying this algorithm, a localization success of 87.5\%, an identification success of 82.4\%, and an F1-score of 0.858 were obtained in relation to the total amount of fruits. These detection rates are similar to those obtained by RGB-based systems, but with the additional advantages of providing direct 3D fruit location information, which is not affected by sunlight variations. From the experimental results, it can be concluded that LiDAR-based technology and, particularly, its reflectance information, has potential for remote apple detection and 3D location.

}, issn = {1537-5110}, doi = {10.1016/j.biosystemseng.2019.08.017}, url = {https://authors.elsevier.com/c/1Zmc45Tbkk9EHW}, author = {Gen{\'e}-Mola, Jordi and Gregorio, Eduard and Guevara, Javier and Auat Cheein, Fernando and Sanz, Ricardo and Escol{\`a}, Alexandre and Llorens Calveras, Jordi and Morros, J.R. and Ruiz-Hidalgo, J. and Ver{\'o}nica Vilaplana and Rosell-Polo, Joan R.} } @article {aGene-Molaa, title = {KFuji RGB-DS database: Fuji apple multi-modal images for fruit detection with color, depth and range-corrected IR data}, journal = {Data in Brief}, year = {2019}, month = {07/2019}, abstract = {

This article contains data related to the research article entitle {\textquotedblleft}Multi-modal Deep Learning for Fruit Detection Using RGB-D Cameras and their Radiometric Capabilities{\textquotedblright} [1]. The development of reliable fruit detection and localization systems is essential for future sustainable agronomic management of high-value crops. RGB-D sensors have shown potential for fruit detection and localization since they provide 3D information with color data. However, the lack of substantial datasets is a barrier for exploiting the use of these sensors. This article presents the KFuji RGB-DS database which is composed by 967 multi-modal images of Fuji apples on trees captured using Microsoft Kinect v2 (Microsoft, Redmond, WA, USA). Each image contains information from 3 different modalities: color (RGB), depth (D) and range corrected IR intensity (S). Ground truth fruit locations were manually annotated, labeling a total of 12,839 apples in all the dataset. The current dataset is publicly available at http://www.grap.udl.cat/publicacions/datasets.html.

}, keywords = {Depth cameras; RGB-D, Fruit detection, Fruit reflectance, Fuji apple, Multi-modal dataset}, doi = {10.1016/j.dib.2019.104289}, author = {Gen{\'e}-Mola, Jordi and Ver{\'o}nica Vilaplana and Rosell-Polo, Joan R. and Morros, J.R. and Ruiz-Hidalgo, J. and Gregorio, Eduard} } @article {aGene-Mola, title = {Multi-modal Deep Learning for Fuji Apple Detection Using RGB-D Cameras and their Radiometric Capabilities}, journal = {Computers and Electronics in Agriculture}, volume = {162}, year = {2019}, month = {07/2019}, chapter = {689-698}, abstract = {

Fruit detection and localization will be essential for future agronomic management of fruit crops, with applications in yield prediction, yield mapping and automated harvesting. RGB-D cameras are promising sensors for fruit detection given that they provide geometrical information with color data. Some of these sensors work on the principle of time-of-flight (ToF) and, besides color and depth, providethe backscatter signal intensity. However, this radiometric capability has not been exploited for fruit detection applications. This workpresents the KFuji RGB-DS database, composed of 967 multi-modal images containing a total of 12,839 Fuji apples. Compilation of th\ database allowed a study of the usefulness of fusing RGB-D and radiometric information obtained with Kinect v2 for fruit detection. Todo so, the signal intensity was range corrected to overcome signal attenuation, obtaining an image that was proportional to the reflectanceof the scene. A registration between RGB, depth and intensity images was then carried out. The Faster R-CNN model was adapted foruse with five-channel input images: color (RGB), depth (D) and range-corrected intensity signal (S). Results show an improvement of4.46\% in F1-score when adding depth and range-corrected intensity channels, obtaining an F1-score of 0.898 and an AP of 94.8\% whenall channels are used. From our experimental results, it can be concluded that the radiometric capabilities of ToF sensors give valuableinformation for fruit detection.

}, keywords = {Agricultural robotics, Convolutional Neural Networks, Fruit detection, Fruit reflectance, Multi-modal faster R-CNN, RGB-D}, doi = {10.1016/j.compag.2019.05.016}, author = {Gen{\'e}-Mola, Jordi and Ver{\'o}nica Vilaplana and Rosell-Polo, Joan R. and Morros, J.R. and Ruiz-Hidalgo, J. and Gregorio, Eduard} } @conference {cGene-Mola19, title = {Uso de redes neuronales convolucionales para la detecci{\'o}n remota de frutos con c{\'a}maras RGB-D}, booktitle = {Congreso Ib{\'e}rico de Agroingenier{\'\i}a}, year = {2019}, month = {09/2019}, publisher = { Universidad de Zaragoza (UZA)}, organization = { Universidad de Zaragoza (UZA)}, address = {Huesca}, abstract = {

La detecci{\'o}n remota de frutos ser{\'a} una herramienta indispensable para la gesti{\'o}n agron{\'o}mica optimizada y sostenible de las plantaciones frut{\'\i}colas del futuro, con aplicaciones en previsi{\'o}n de cosecha, robotizaci{\'o}n de la recolecci{\'o}n y elaboraci{\'o}n de mapas de producci{\'o}n. Este trabajo propone el uso de c{\'a}maras de profundidad RGB-D para la detecci{\'o}n y la posterior localizaci{\'o}n 3D de los frutos. El material utilizado para la adquisici{\'o}n de datos consiste en una plataforma terrestre autopropulsada equipada con dos sensores Kinect v2 de Microsoft y un sistema de posicionamiento RTK-GNSS. Con este equipo se escanearon 3 filas de manzanos Fuji de una explotaci{\'o}n comercial. El conjunto de datos adquiridos est{\'a} compuesto por 110 capturas que contienen un total de 12,838 manzanas Fuji. La detecci{\'o}n de frutos se realiz{\'o} mediante los datos RGB (im{\'a}genes de color proporcionadas por el sensor). Para ello, se implement{\'o} y se entren{\'o} la red neuronal convolucional de detecci{\'o}n de objetos Faster R-CNN, la cual est{\'a} compuesta por dos m{\'o}dulos: red de propuesta de regiones de inter{\'e}s y red de clasificaci{\'o}n. Ambos m{\'o}dulos comparten las primeras capas convolucionales siguiendo el modelo VGG-16 pre-entrenado con la base de datos ImageNet. Los resultados de test muestran un porcentaje de detecci{\'o}n del 91.4\% de los frutos con un 15.9\% de falsos positivos (F1-score = 0.876). La evaluaci{\'o}n cualitativa de las detecciones muestra que los falsos positivos corresponden a zonas de la imagen que presentan un patr{\'o}n muy similar a una manzana, donde, incluso a percepci{\'o}n del ojo humano, es dif{\'\i}cil de determinar si hay o no manzana. Por otro lado, las manzanas no detectadas corresponden a aquellas que estaban ocultas casi en su totalidad por otros {\'o}rganos vegetativos (hojas o ramas) o a manzanas cortadas por los m{\'a}rgenes de la imagen. De los resultados experimentales se concluye que el sensor Kinect v2 tiene un gran potencial para la detecci{\'o}n y localizaci{\'o}n 3D de frutos. La principal limitaci{\'o}n del sistema es que el rendimiento del sensor de profundidad se ve afectado en condiciones de alta iluminaci{\'o}n

}, keywords = {C{\'a}maras de profundidad, Detecci{\'o}n de frutos, Redes neuronales convolucionales, RGB-D, Rob{\'o}tica agr{\'\i}cola}, doi = {https://doi.org/10.26754/c_agroing.2019.com.3325}, author = {Gen{\'e}-Mola, Jordi and Ver{\'o}nica Vilaplana and Rosell-Polo, Joan R. and Morros, J.R. and Ruiz-Hidalgo, J. and Gregorio, Eduard} } @conference {cGene-Mola18, title = {Fruit Detection Using Mobile Terrestrial Laser Scanning}, booktitle = {AgEng 2018,}, year = {2018}, month = {07/2018}, address = {Wageningen (Netherlands)}, abstract = {

The development of reliable fruit detection and localization systems is essential for future sustainable agronomic management of high-value crops. Up to date, most proposed systems on fruit detection and characterization are based on RGB cameras and thus affected by intrinsic constraints, such as variable lighting conditions and camera calibration. This work presents a new technique that uses a mobile terrestrial laser scanner to detect and localize fruits regardless of the prevailing lighting conditions and without the need of a previous calibration. An experimental test focused on two Fuji apple trees (containing 139 and 145 apples each) was carried out. A 3D point cloud of this scene was generated using a Velodyne VLP-16 LiDAR sensor synchronized with a RTK-GNSS receiver. A reflectivity analysis of tree elements was performed, obtaining mean reflectivity values of 28.9\%, 29.1\%, and 44.3\% for leaves, trunks, and fruits, respectively. These results suggest that the reflectivity parameter can be useful to localize fruits in the tree. From this knowledge, a three-step fruit detection algorithm has been developed: 1) reflectivity thresholding to remove most of the leaves and trunks from the original point cloud; 2) statistical outlier removal to reduce noise; 3) connected components clustering using a density-based algorithm. By applying this algorithm to our dataset, a localization success of 85\%, a detachment success of 78.8\%, and a false detection rate of 15.2\% were obtained. These detection rates are similar to those obtained by current RGB-based system, but with the additional advantage of providing direct 3D fruit location information (global coordinates) which is not affected by sunlight variations. It can be concluded that LiDAR technology and, particularly, its reflectivity information, might have potential use in fruit detection. Future work should include the application of this fruit detection technique on a wider range of crop types

}, author = {Gen{\'e}-Mola, Jordi and Gregorio, Eduard and Guevara, Javier and Auat, Fernando and Escol{\`a}, Alexandre and Morros, J.R. and Rosell-Polo, Joan R.} }