@inproceedings{3abbb735f7da49bd932e0a00156e3123,
title = "PRISM: Progressive Restoration for Scene Graph-Based Image Manipulation",
abstract = "Scene graphs have emerged as accurate semantic descriptions for image generation and manipulation tasks; however, their complexity and diversity of the shapes and relations of objects in data make it challenging to incorporate them into the models and generate high-quality results. To address these challenges, we propose PRISM, a novel progressive multi-head image manipulation approach to improve the accuracy of the manipulation of target regions in the scene. Our image manipulation framework is trained using an end-to-end denoising masked reconstruction proxy task, where the masked regions are progressively unmasked from the outer regions to the inner part. We take advantage of the outer part of the masked area as they have a direct correlation with the context of the scene. Moreover, our multi-head architecture simultaneously generates detailed object-specific regions in addition to the entire image to produce higher-quality images. Our model is evaluated against methods in the semantic image manipulation task on the CLEVR and Visual Genome datasets. Our results demonstrate the potential of our approach for enhancing the quality and precision of scene graph-based image manipulation.",
author = "Pavel Jahoda and Yousef Yeganeh and Ehsan Adeli and Nassir Navab and Azade Farshad",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.; Workshops that were held in conjunction with the 18th European Conference on Computer Vision, ECCV 2024 ; Conference date: 29-09-2024 Through 04-10-2024",
year = "2025",
doi = "10.1007/978-3-031-91838-4\_9",
language = "English",
isbn = "9783031918377",
series = "Lecture Notes in Computer Science",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "142--160",
editor = "\{Del Bue\}, Alessio and Cristian Canton and Jordi Pont-Tuset and Tatiana Tommasi",
booktitle = "Computer Vision – ECCV 2024 Workshops, Proceedings",
}