@mastersthesis {xNieto, title = {Video Saliency Prediction with Deep Neural Networks}, year = {2019}, abstract = {

Saliency prediction is a topic undergoing intense study in computer vision with a broad range of applications. It consists in predicting where the attention is going to be received in an image or a video by a human. Our work is based on a deep neural network named SalGAN, which was trained on a saliency annotated dataset of static images. In this thesis we investigate different approaches for extending SalGAN to the video domain. To this end, we investigate the recently proposed saliency annotated video dataset DHF1K to train and evaluate our models. The obtained results indicate that techniques such as depth estimation or coordconv can effectively be used as additional modalities to enhance the saliency prediction of static images obtained with SalGAN, achieving encouraging results in the DHF1K benchmark. Our work is based on pytorch and it is publicly available here.

Source code

Video Saliency Prediction with Deep Neural Networks - Juan Jose Nieto - DCU 2019 from Universitat Polit{\`e}cnica de Catalunya

}, author = {Nieto, Juan Jos{\'e}}, editor = {Mohedano, Eva and McGuinness, Kevin and Xavier Gir{\'o}-i-Nieto} }