Skip to content

Commit

Permalink
Improve control socket creation logic.
Browse files Browse the repository at this point in the history
Attempt to check for existing, active socket before unlinking it, and
refuse to start if there is one. Unfortunately this check cannot be done
atomically.

If the socket doesn't exist or appears stale, unlink it even if we
aren't system init. This will most likely give a better experience for
users.
  • Loading branch information
davmac314 committed Aug 22, 2021
1 parent c824f05 commit cb738f8
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 18 deletions.
5 changes: 4 additions & 1 deletion doc/manpages/dinit.8.m4
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,10 @@ services.
During execution, Dinit accepts commands via a control socket which is created
by Dinit when it starts. This can be used to order that a service be started
or stopped, to determine service status, or to make certain configuration
changes. See \fBdinitctl\fR(8) for details.
changes. See \fBdinitctl\fR(8) for details. Dinit attempts to check for the
existence of an already-active socket first, and will refuse to start if one
exists. Unfortunately, this check cannot be done atomically, and should not be
relied upon generally as a means to avoid starting two instances of dinit.

Process-based services are monitored and, if the process terminates, the
service may be stopped or the process may be re-started, according to the
Expand Down
56 changes: 39 additions & 17 deletions src/dinit.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ eventloop_t event_loop(dasynq::delayed_init {});
static void sigint_reboot_cb(eventloop_t &eloop) noexcept;
static void sigquit_cb(eventloop_t &eloop) noexcept;
static void sigterm_cb(eventloop_t &eloop) noexcept;
static void open_control_socket(bool report_ro_failure = true) noexcept;
static bool open_control_socket(bool report_ro_failure = true) noexcept;
static void close_control_socket() noexcept;
static void confirm_restart_boot() noexcept;
static void flush_log() noexcept;
Expand Down Expand Up @@ -447,9 +447,9 @@ int dinit_main(int argc, char **argv)
init_log(log_is_syslog);
log_flush_timer.add_timer(event_loop, dasynq::clock_type::MONOTONIC);

// Try to open control socket (may fail due to readonly filesystem)
open_control_socket(!am_system_init);
if (!am_system_init && !control_socket_open) {
// Try to open control socket (may fail due to readonly filesystem, we ignore that if we are
// system init)
if (!open_control_socket(!am_system_init)) {
flush_log();
return EXIT_FAILURE;
}
Expand Down Expand Up @@ -759,8 +759,9 @@ void rootfs_is_rw() noexcept

// Open/create the control socket, normally /dev/dinitctl, used to allow client programs to connect
// and issue service orders and shutdown commands etc. This can safely be called multiple times;
// once the socket has been successfully opened, further calls have no effect.
static void open_control_socket(bool report_ro_failure) noexcept
// once the socket has been successfully opened, further calls will check the socket file is still
// present and re-create it if not.
static bool open_control_socket(bool report_ro_failure) noexcept
{
if (control_socket_open) {
struct stat stat_buf;
Expand All @@ -781,13 +782,7 @@ static void open_control_socket(bool report_ro_failure) noexcept
struct sockaddr_un * name = static_cast<sockaddr_un *>(malloc(sockaddr_size));
if (name == nullptr) {
log(loglevel_t::ERROR, "Opening control socket: out of memory");
return;
}

if (am_system_init) {
// Unlink any stale control socket file, but only if we are system init, since otherwise
// the 'stale' file may not be stale at all:
unlink(saddrname);
return false;
}

name->sun_family = AF_UNIX;
Expand All @@ -797,16 +792,41 @@ static void open_control_socket(bool report_ro_failure) noexcept
if (sockfd == -1) {
log(loglevel_t::ERROR, "Error creating control socket: ", strerror(errno));
free(name);
return;
return false;
}

// Check if there is already an active control socket (from another instance).
// Unfortunately, there's no way to check atomically if a socket file is stale. Still, we
// will try to check, since the consequences of running a system dinit instance twice are
// potentially severe.
int connr = connect(sockfd, (struct sockaddr *) name, sockaddr_size);
if (connr != -1 || errno == EAGAIN) {
log(loglevel_t::ERROR, "Control socket is already active"
" (another instance already running?)");

close(connr);
close(sockfd);
free(name);

return false;
}

// Unlink any stale control socket file.
//
// In the worst case, this potentially removes a socket which was not active at the time
// we checked (just above) but has since become active; there's just no good API to avoid
// this (we'd have to use a file lock, on yet another file). Since that's unlikely to
// occur in practice, and because a stale socket will prevent communication with dinit (or
// prevent it starting), then we'll take the chance on unlinking here.
unlink(saddrname);

if (bind(sockfd, (struct sockaddr *) name, sockaddr_size) == -1) {
if (errno != EROFS || report_ro_failure) {
log(loglevel_t::ERROR, "Error binding control socket: ", strerror(errno));
}
close(sockfd);
free(name);
return;
return (errno != EROFS || report_ro_failure);
}

free(name);
Expand All @@ -816,13 +836,13 @@ static void open_control_socket(bool report_ro_failure) noexcept
if (chmod(saddrname, S_IRUSR | S_IWUSR) == -1) {
log(loglevel_t::ERROR, "Error setting control socket permissions: ", strerror(errno));
close(sockfd);
return;
return false;
}

if (listen(sockfd, 10) == -1) {
log(loglevel_t::ERROR, "Error listening on control socket: ", strerror(errno));
close(sockfd);
return;
return false;
}

try {
Expand All @@ -835,6 +855,8 @@ static void open_control_socket(bool report_ro_failure) noexcept
close(sockfd);
}
}

return control_socket_open;
}

static void close_control_socket() noexcept
Expand Down

0 comments on commit cb738f8

Please sign in to comment.