BibTex
@inproceedings{Leimk{\"u}hler:2016:10.20380/GI2016.02,
author = {Leimk{\"u}hler, Thomas and Kellnhofer, Petr and Ritschel, Tobias and Myszkowski, Karol and Seidel, Hans-Peter},
title = {Perceptual Real-time 2D-to-3D Conversion Using Cue Fusion},
booktitle = {Proceedings of Graphics Interface 2016},
series = {GI 2016},
year = {2016},
issn = {0713-5424},
isbn = {978-0-9947868-1-4},
location = {Victoria, British Columbia, Canada},
pages = {5--12},
numpages = {8},
doi = {10.20380/GI2016.02},
publisher = {Canadian Human-Computer Communications Society / Soci{\'e}t{\'e} canadienne du dialogue humain-machine},
}
Abstract
We propose a system to infer binocular disparity from a monocular video stream in real-time. Different from classic reconstruction of physical depth in computer vision, we compute perceptually plausible disparity, that is numerically inaccurate, but results in a very similar overall depth impression with plausible overall layout, sharp edges, fine details and agreement between luminance and disparity. We use several simple monocular cues to estimate disparity maps and confidence maps of low spatial and temporal resolution in real-time. These are complemented by spatially-varying, appearancedependent and class-specific disparity prior maps, learned from example stereo images. Scene classification selects this prior at runtime. Fusion of prior and cues is done by means of robust MAP inference on a dense spatio-temporal conditional random field with high spatial and temporal resolution. Using normal distributions allows this in constant-time, parallel per-pixel work. We compare our approach to previous 2D-to-3D conversion systems in terms of different metrics, as well as a user study.