@inproceedings{3dfb46f910fe43cdb68a9446b66aa7ea,
title = "SNAP:Successor Entropy based Incremental Subgoal Discovery for Adaptive Navigation",
abstract = "Reinforcement learning (RL) has demonstrated great success in solving navigation tasks but often fails when learning complex environmental structures. One open challenge is to incorporate low-level generalizable skills with human-like adaptive path-planning in an RL framework. Motivated by neural findings in animal navigation, we propose a Successor eNtropy-based Adaptive Path-planning (SNAP) that combines a low-level goal-conditioned policy with the flexibility of a classical high-level planner. SNAP decomposes distant goal-reaching tasks into multiple nearby goal-reaching sub-tasks using a topological graph. To construct this graph, we propose an incremental subgoal discovery method that leverages the highest-entropy states in the learned Successor Representation. The Successor Representation encodes the likelihood of being in a future state given the current state and capture the relational structure of states based on a policy. Our main contributions lie in discovering subgoal states that efficiently abstract the state-space and proposing a low-level goal-conditioned controller for local navigation. Since the basic low-level skill is learned independent of state representation, our model easily generalizes to novel environments without intensive relearning. We provide empirical evidence that the proposed method enables agents to perform long-horizon sparse reward tasks quickly, take detours during barrier tasks, and exploit shortcuts that did not exist during training. Our experiments further show that the proposed method outperforms the existing goal-conditioned RL algorithms in successfully reaching distant-goal tasks and policy learning. To evaluate human-like adaptive path-planning, we also compare our optimal agent with human data and found that, on average, the agent was able to find a shorter path than the human participants.",
keywords = "Adaptive path-planning, Goal-conditioned RL, Hippocampus, Option discovery, Robot navigation",
author = "Dubey, {Rohit K.} and Sohn, {Samuel S.} and Jimmy Abualdenien and Tyler Thrash and Christoph Hoelscher and Andr{\'e} Borrmann and Mubbasir Kapadia",
note = "Publisher Copyright: {\textcopyright} 2021 ACM.; 14th ACM SIGGRAPH Conference on Motion, Interaction, and Games, MIG 2021 ; Conference date: 10-11-2021 Through 12-11-2021",
year = "2021",
month = nov,
day = "10",
doi = "10.1145/3487983.3488292",
language = "English",
series = "Proceedings - MIG 2021: 14th ACM SIGGRAPH Conference on Motion, Interaction, and Games",
publisher = "Association for Computing Machinery, Inc",
editor = "Spencer, {Stephen N.}",
booktitle = "Proceedings - MIG 2021",
}