@conference {cSalvadorc, title = {Learning Cross-modal Embeddings for Cooking Recipes and Food Images}, booktitle = {CVPR}, year = {2017}, month = {03/2017}, publisher = {CVF / IEEE}, organization = {CVF / IEEE}, address = {Honolulu, Hawaii, USA}, abstract = {

In this paper, we introduce Recipe1M, a new large-scale, structured corpus of over 1m cooking recipes and 800k food images. As the largest publicly available collection of recipe data, Recipe1M affords the ability to train high-capacity models on aligned, multi-modal data. Using these data, we train a neural network to find a joint embedding of recipes and images that yields impressive results on an image-recipe retrieval task. Additionally, we demonstrate that regularization via the addition of a high-level classification objective both improves retrieval performance to rival that of humans and enables semantic vector arithmetic. We postulate that these embeddings will provide a basis for further exploration of the Recipe1M dataset and food and cooking in general.

[Project page]

In the news:

}, doi = {10.1109/CVPR.2017.327}, url = {http://openaccess.thecvf.com/content_cvpr_2017/html/Salvador_Learning_Cross-Modal_Embeddings_CVPR_2017_paper.html}, author = {Amaia Salvador and Hynes, Nicholas and Aytar, Yusuf and Marin, Javier and Ofli, Ferda and Weber, Ingmar and Torralba, Antonio} }