@inbook {bWaibel10, title = {Computers in the Human Interaction Loop}, booktitle = {Handbook on Ambient Intelligence and Smart Environments (AISE)}, year = {2010}, pages = {1071{\textendash}1116}, publisher = {Springer}, organization = {Springer}, address = {Boston, MA}, abstract = {

It is a common experience in our modern world, for us humans to be overwhelmed by the complexities of technological artifacts around us, and by the attention they demand. While technology provides wonderful support and helpful assistance, it also causes an increased preoccupation with technology itself and a related fragmentation of attention. But as humans, we would rather attend to a meaningful dialog and interaction with other humans, than to control the operations of machines that serve us. The cause for such complexity and distraction, however, is a natural consequence of the flexibility and choice of functions and features that technology has to offer. Thus flexibility of choice and the availability of desirable functions are in conflict with ease of use and our very ability to enjoy their benefits.\ 

}, isbn = {978-0-387-93807-3}, doi = {10.1007/978-0-387-93808-0_40}, author = {Waibel, A. and Stiefelhagen, R. and Carlson, R. and Casas, J. and Kleindienst, J. and Lamel, L. and Lanz, O. and Mostefa, D. and Omologo, M. and Pianesi, F. and Polymenakos, L. and Potamianos, G. and Soldatos, J. and Sutschet, G. and Terken, J.} } @article {aMostefa07, title = {The CHIL Audiovisual Corpus for Lecture and Meeting Analysis inside Smart Rooms}, journal = {Language resources and evaluation}, volume = {41}, number = {3}, year = {2007}, month = {01/2008}, pages = {389{\textendash}407}, abstract = {

The analysis of lectures and meetings inside smart rooms has recently attracted much interest in the literature, being the focus of international projects and technology evaluations. A key enabler for progress in this area is the availability of appropriate multimodal and multi-sensory corpora, annotated with rich human activity information during lectures and meetings. This paper is devoted to exactly such a corpus, developed in the framework of the European project CHIL, {\textquotedblleft}Computers in the Human Interaction Loop{\textquotedblright}. The resulting data set has the potential to drastically advance the state-of-the-art, by providing numerous synchronized audio and video streams of real lectures and meetings, captured in multiple recording sites over the past 4 years. It particularly overcomes typical shortcomings of other existing databases that may contain limited sensory or monomodal data, exhibit constrained human behavior and interaction patterns, or lack data variability. The CHIL corpus is accompanied by rich manual annotations of both its audio and visual modalities. These provide a detailed multi-channel verbatim orthographic transcription that includes speaker turns and identities, acoustic condition information, and named entities, as well as video labels in multiple camera views that provide multi-person 3D head and 2D facial feature location information. Over the past 3 years, the corpus has been crucial to the evaluation of a multitude of audiovisual perception technologies for human activity analysis in lecture and meeting scenarios, demonstrating its utility during internal evaluations of the CHIL consortium, as well as at the recent international CLEAR and Rich Transcription evaluations. The CHIL corpus is publicly available to the research community

}, issn = {1574-020X}, doi = {10.1007/s10579-007-9054-4}, author = {Mostefa, D. and Moreau, N. and Choukri, K. and Potamianos, G. and Chu, S. and Tyagi, A. and Casas, J. and Turmo, J. and Cristoforetti, L. and Tobia, F. and Pnevmatikakis, A. and Mylonakis, V. and Talantzis, F. and Burger, S. and Stiefelhagen, R. and Bernardin, K. and Rochet, C.} }