@inproceedings{Leurent2020practical,
	author="Leurent, Edouard
	and Maillard, Odalric-Ambrym",
	editor="Brefeld, Ulf
	and Fromont, Elisa
	and Hotho, Andreas
	and Knobbe, Arno
	and Maathuis, Marloes
	and Robardet, C{\'e}line",
	title="Practical Open-Loop Optimistic Planning",
	booktitle="European Conference on Machine Learning and Knowledge Discovery in Databases",
	year="2020",
	publisher="Springer International Publishing",
	address="Würzburg, Germany",
	month="16-20 Sep",
	pages="69--85",
	abstract="We consider the problem of online planning in a Markov Decision Process when given only access to a generative model, restricted to open-loop policies - i.e. sequences of actions - and under budget constraint. In this setting, the Open-Loop Optimistic Planning (OLOP) algorithm enjoys good theoretical guarantees but is overly conservative in practice, as we show in numerical experiments. We propose a modified version of the algorithm with tighter upper-confidence bounds, KL-OLOP, that leads to better practical performances while retaining the sample complexity bound. Finally, we propose an efficient implementation that significantly improves the time complexity of both algorithms.",
	isbn="978-3-030-46133-1"
}