@conference {xLidona, title = {Semantic Summarization of Egocentric Photo Stream Events}, booktitle = {ACM Multimedia 2017 Workshop on Lifelogging Tools and Applications}, year = {2017}, month = {10/2017}, publisher = {ACM}, organization = {ACM}, address = {Mountain View, CA, USA}, abstract = {
With the rapid increase of users of wearable cameras in recent years and of the amount of data they produce, there is a strong need for automatic retrieval and summarization techniques. This work addresses the problem of automatically summarizing egocentric photo streams captured through a wearable camera by taking an image retrieval perspective. After removing non-informative images by a new CNN-based filter, \ images are ranked by relevance to ensure semantic diversity and \ finally re-ranked by a novelty criterion to reduce redundancy. \ To assess the results, a new evaluation metric is proposed which takes into account the non-uniqueness of the solution. Experimental results applied on a database of 7,110 images from 6 different subjects and evaluated by experts gave 95.74\% of experts satisfaction and a Mean Opinion Score of 4.57 out of 5.0.
This paper presents the results of the UPC-UB-STP team in the 2015 MediaEval Retrieving Diverse Images Task.The goal of the challenge is to provide a ranked list of Flickr photos for a predefined set of queries. Our approach firstly generates a ranking of images based on a query-independent estimation of its relevance. Only top results are kept and iteratively re-ranked based on their intra-similarity to introduce diversity.
Building a visual summary from an egocentric photostream captured by a lifelogging wearable camera is of high interest for different applications (e.g. memory reinforcement).\ In this paper, we propose a new summarization method based on keyframes selection that\ uses visual features extracted by means of a convolutional neural network. Our method applies an unsupervised clustering for dividing the photostreams into events, and finally extracts the most relevant keyframe for each event. We assess the results by applying a blind-taste test on a group of 20 people who assessed the quality of the summaries.
\