@inproceedings{be4d975ff9f047379d0343a7d9fcc77f,
title = "Free-form Description Guided 3D Visual Graph Network for Object Grounding in Point Cloud",
abstract = "3D object grounding aims to locate the most relevant target object in a raw point cloud scene based on a free-form language description. Understanding complex and diverse descriptions, and lifting them directly to a point cloud is a new and challenging topic due to the irregular and sparse nature of point clouds. There are three main challenges in 3D object grounding: to find the main focus in the complex and diverse description; to understand the point cloud scene; and to locate the target object. In this paper, we address all three challenges. Firstly, we propose a language scene graph module to capture the rich structure and long-distance phrase correlations. Secondly, we introduce a multi-level 3D proposal relation graph module to extract the object-object and object-scene co-occurrence relationships, and strengthen the visual features of the initial proposals. Lastly, we develop a description guided 3D visual graph module to encode global contexts of phrases and proposals by a nodes matching strategy. Extensive experiments on challenging benchmark datasets (ScanRefer [3] and Nr3D [42]) show that our algorithm outperforms existing state-of-the-art. Our code is available at https://github.com/PNXD/FFL-3DOG.",
author = "Mingtao Feng and Zhen Li and Qi Li and Liang Zhang and Zhang, {Xiang Dong} and Guangming Zhu and Hui Zhang and Yaonan Wang and Ajmal Mian",
year = "2021",
doi = "10.1109/ICCV48922.2021.00370",
language = "English",
series = "Proceedings of the IEEE International Conference on Computer Vision",
publisher = "IEEE, Institute of Electrical and Electronics Engineers",
pages = "3702--3711",
booktitle = "Proceedings - 2021 IEEE/CVF International Conference on Computer Vision, ICCV 2021",
address = "United States",
note = "18th IEEE/CVF International Conference on Computer Vision, ICCV 2021 ; Conference date: 11-10-2021 Through 17-10-2021",
}