@mastersthesis {xDomenech, title = {Hiding Images in their Spoken Narratives}, year = {2022}, abstract = {

Steganography is the technique of hiding secret data within an ordinary, non-secret, file or message in order to avoid its detection. Throughout our work, we study the case where the hidden secret data is an image and the non-secret data or cover signal is an audio. To this end, we use a recently proposed residual architecture operating on top of short-time discrete cosine transform (STDCT) audio spectrograms. In our work, we evaluate the above mentioned residual steganography architecture with the Localized Narratives dataset, explore the feasibility of using short-time fourier transform (STFT) audio spectrograms instead of STDCTs to improve the efficiency of the system, investigate the use of hidden signals permuted with the objective to spread the audio corruption of the revealed images, apply averaged audio windows to improve quality results and tested the system in real-world distortions.

}, author = {Teresa Domenech}, editor = {McGuinness, Kevin and Pons, Jordi and Xavier Gir{\'o}-i-Nieto} } @conference {cGeleta21, title = {PixInWav: Residual Steganography for Hiding Pixels in Audio}, booktitle = {ICASSP}, year = {2022}, month = {06/2021}, abstract = {

Steganography comprises the mechanics of hiding data in a host media that may be publicly available. While previous works focused on unimodal setups (e.g., hiding images in images, or hiding audio in audio), PixInWav targets the multimodal case of hiding images in audio. To this end, we propose a novel residual architecture operating on top of short-time discrete cosine transform (STDCT) audio spectrograms. Among our results, we find that the residual audio steganography setup we propose allows independent encoding of the hidden image from the host audio without compromising quality. Accordingly, while previous works require both host and hidden signals to hide a signal, PixInWav can encode images offline --- which can be later hidden, in a residual fashion, into any audio signal. Finally, we test our scheme in a lab setting to transmit images over airwaves from a loudspeaker to a microphone verifying our theoretical insights and obtaining promising results.

Paper, poster \& video on IEEE SigPort.
Full paper on\ arXiv,\ UPCommons and IEEE Explore.
Source code on github.
Tweets by @ritageleta: [1], [2], [3].
Tweets by @DocXavi: [1], [2], [3].
CVPR 2021 Women in Computer Vision Workshop.

\ Deep Learning Barcelona Symposium 2022

Presentation from the early stages of the project (January 2021):

}, author = {Geleta, Margarita and Punt{\'\i}, Cristina and McGuinness, Kevin and Pons, Jordi and Cristian Canton-Ferrer and Xavier Gir{\'o}-i-Nieto} }