@conference {cAssensa, title = {PathGAN: Visual Scanpath Prediction with Generative Adversarial Networks}, booktitle = {ECCV 2018 Workshop on Egocentric Perception, Interaction and Compution (EPIC)}, year = {2018}, month = {07/2018}, publisher = {Springer}, organization = {Springer}, address = {Munich, Germany}, abstract = {

We introduce PathGAN, a deep neural network for visual scanpath prediction trained on adversarial examples. A visual scanpath is defined as the sequence of fixation points over an image defined by a human observer with its gaze. PathGAN is composed of two parts, the generator and the discriminator. Both parts extract features from images using off-the-shelf networks, and train recurrent layers to generate or discriminate scanpaths accordingly. In scanpath prediction, the stochastic nature of the data makes it very difficult to generate realistic predictions using supervised learning strategies, but we adopt adversarial training as a suitable alternative. Our experiments prove how PathGAN improves the state of the art of visual scanpath prediction on the Salient360! dataset.

This work obtained the\ 2nd award in Prediction of Head-gaze Scan-paths for Images, and the 2nd award in Prediction of Eye-gaze Scan-paths for Images at the IEEE ICME 2018 Salient360! Challenge.

}, doi = {10.1007/978-3-030-11021-5_25}, url = {https://doi.org/10.1007/978-3-030-11021-5_25}, author = {Assens, Marc and McGuinness, Kevin and Xavier Gir{\'o}-i-Nieto and O{\textquoteright}Connor, N.} } @article {aAssens, title = {Scanpath and Saliency Prediction on 360 Degree Images}, journal = {Elsevier Signal Processing: Image Communication}, year = {2018}, abstract = {

We introduce deep neural networks for scanpath and saliency prediction trained on 360-degree images. The scanpath prediction model called SaltiNet is based on a temporal-aware novel representation of saliency information named the saliency volume. The first part of the network consists of a model trained to generate saliency volumes, whose parameters are fit by back-propagation using a binary cross entropy (BCE) loss over downsampled versions of the saliency volumes. Sampling strategies over these volumes are used to generate scanpaths over the 360-degree images. Our experiments show the advantages of using saliency volumes, and how they can be used for related tasks. We also show how a similar architecture achieves state-of-the-art performance for the related task of saliency map prediction. Our source code and trained models available here.

}, url = {https://www.sciencedirect.com/science/article/pii/S0923596518306209}, author = {Assens, Marc and McGuinness, Kevin and O{\textquoteright}Connor, N. and Xavier Gir{\'o}-i-Nieto} } @conference {cAssens, title = {SaltiNet: Scan-path Prediction on 360 Degree Images using Saliency Volumes}, booktitle = {ICCV Workshop on Egocentric Perception, Interaction and Computing}, year = {2017}, month = {07/2017}, publisher = {IEEE}, organization = {IEEE}, address = {Venice, Italy}, abstract = {

We introduce SaltiNet, a deep neural network for scanpath prediction trained on 360-degree images. The first part of the network consists of a model trained to generate saliency volumes, whose parameters are learned by back-propagation computed from a binary cross entropy (BCE) loss over downsampled versions of the saliency volumes. Sampling strategies over these volumes are used to generate scanpaths over the 360-degree images. Our experiments show the advantages of using saliency volumes, and how they can be used for related tasks.

Winner of three awards at the Salient 360 Challenge at IEEE ICME 2017 (Hong Kong): Best Scan Path, Best Student Scan-path and Audience Award.

SaltiNet: The Temporal Dimension of Visual Attention Models from Xavier Giro-i-Nieto

}, isbn = {978-1-5386-1034-3}, doi = {10.1109/ICCVW.2017.275}, url = {http://ieeexplore.ieee.org/document/8265485/}, author = {Assens, Marc and McGuinness, Kevin and Xavier Gir{\'o}-i-Nieto and O{\textquoteright}Connor, N.} } @mastersthesis {xAssens, title = {The Temporal Dimension of Visual Attention Models}, year = {2017}, abstract = {

Program: Bachelor Degree on Telecommunications Science and Technologies (CITTEL)

Grade: A with honours (10.0/10.0)

This thesis explores methodologies for scanpath prediction on images using deep learning\ frameworks.\ As a preliminary step, we analyze the characteristics of the data provided by dierent datasets.\ We then explore the use of Convolutional Neural Networks (CNN) and Long-Short-Term-Memory\ (LSTM) newtworks for scanpath prediction. We observe that these models fail due to the high\ stochastic nature of the data.\ With the gained insight, we propose a novel time-aware visual saliency representation named\ Saliency Volume, that averages scanpaths over multiple observers.\ Next, we explore the SalNet network and adapt it for saliency volume prediction, and we find\ several ways of generating scanpaths from saliency volumes.\ Finally, we ne-tuned our model for scanpaht prediction on 360-degree images and successfully\ submitted it to the Salient360! Challenge from ICME. The source code and models are publicly\ available at https://github.com/massens/saliency-360salient-2017.

The Temporal Dimension of Visual Attention Models from Xavier Giro-i-Nieto

}, author = {Assens, Marc}, editor = {McGuinness, Kevin and Xavier Gir{\'o}-i-Nieto and Noel E. O{\textquoteright}Connor} }