From 99a2075596b1b58051a315673a726680adf4d79b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?= Date: Wed, 27 Nov 2024 12:40:37 +0100 Subject: [PATCH] horus: Work around race in `cover:is_compiled/1` [Why] The `cover:call/1` internal checks if the cover server is running and starts it if it's not: Ref = erlang:monitor(process,?SERVER), receive {'DOWN', Ref, _Type, _Object, noproc} -> erlang:demonitor(Ref), {ok,_} = start(), ... However, there is no lock between the check and the start. Therefore, any concurrent uses of this code may cause a `badmatch` crash on `{ok,_} = start()` because it might have been started in parallel and the function returns the `already_started` error. [How] To work around the problem, we catch the `badmatch` and retry once after a sleep of 100 ms. We don't try to catch any exception from that second attempt. --- src/horus.erl | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/horus.erl b/src/horus.erl index f8e7794..dfcef3e 100644 --- a/src/horus.erl +++ b/src/horus.erl @@ -1998,7 +1998,7 @@ get_object_code(Module) -> %% @private do_get_object_code(Module) -> - case cover:is_compiled(Module) of + case is_cover_compiled(Module) of false -> get_object_code_from_code_server(Module); {file, Filename} -> @@ -2008,6 +2008,18 @@ do_get_object_code(Module) -> erpc:call(CoverMainNode, ?MODULE, do_get_object_code, [Module]) end. +is_cover_compiled(Module) -> + try + cover:is_compiled(Module) + catch + error:{badmatch, {error, {already_started, _}}} -> + %% The code in `cover' that checks if the cover server is started + %% seem racy. We get an exception if it thinks it has to start it + %% but it is already started. In this case, we retry the call. + timer:sleep(100), + cover:is_compiled(Module) + end. + get_object_code_from_code_server(Module) -> case code:get_object_code(Module) of {Module, Beam, Filename} ->