Skip to content

Commit

Permalink
frontend, backend, common, rpmbuild: allow user SSH to builders
Browse files Browse the repository at this point in the history
  • Loading branch information
FrostyX committed Jan 26, 2024
1 parent b88b7bd commit c5d0ca5
Show file tree
Hide file tree
Showing 23 changed files with 612 additions and 20 deletions.
146 changes: 146 additions & 0 deletions backend/copr_backend/background_worker_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,17 @@
import statistics
import time
import json
import shlex

from datetime import datetime
from packaging import version

from copr_common.enums import StatusEnum
from copr_common.helpers import (
USER_SSH_DEFAULT_EXPIRATION,
USER_SSH_MAX_EXPIRATION,
USER_SSH_EXPIRATION_PATH,
)

from copr_backend.background_worker import BackendBackgroundWorker
from copr_backend.cancellable_thread import CancellableThreadTask
Expand Down Expand Up @@ -51,6 +58,9 @@

COMMANDS = {
"rpm_q_builder": "rpm -q copr-rpmbuild --qf \"%{VERSION}\n\"",
"echo_authorized_keys": "echo {0} >> /root/.ssh/authorized_keys",
"set_expiration": "echo -n {0} > " + USER_SSH_EXPIRATION_PATH,
"cat_expiration": "cat {0}".format(USER_SSH_EXPIRATION_PATH),
}


Expand Down Expand Up @@ -139,6 +149,7 @@ def __init__(self):
self.builder_livelog = os.path.join(self.builder_dir, "main.log")
self.builder_results = os.path.join(self.builder_dir, "results")
self.ssh = None
self.root_ssh = None
self.job = None
self.host = None
self.canceled = False
Expand Down Expand Up @@ -307,6 +318,11 @@ def _parse_results(self):
"""
Parse `results.json` and update the `self.job` object.
"""
# When user SSH is allowed, we don't download any results from the
# builder for safety reasons. Don't try to parse anything.
if self.job.ssh_public_keys:
return

path = os.path.join(self.job.results_dir, "results.json")
if not os.path.exists(path):
raise BackendError("results.json file not found in resultdir")
Expand Down Expand Up @@ -589,12 +605,19 @@ def _download_results(self):
"""
Retry rsync-download the results several times.
"""
filter_ = None
if self.job.ssh_public_keys:
self.log.info("Builder allowed user SSH, not downloading the "
"results for safety reasons.")
filter_ = ["+ success", "+ *.spec", "- *"]

self.log.info("Downloading results from builder")
self.ssh.rsync_download(
self.builder_results + "/",
self.job.results_dir,
logfile=self.job.rsync_log_name,
max_retries=2,
filter_=filter_,
)

def _check_build_success(self):
Expand Down Expand Up @@ -683,6 +706,9 @@ def _collect_built_packages(self, job):
"""
self.log.info("Listing built binary packages in %s", job.results_dir)

if self.job.ssh_public_keys:
return ""

# pylint: disable=unsubscriptable-object
assert isinstance(self.job.results, dict)

Expand Down Expand Up @@ -740,6 +766,123 @@ def _add_pubkey(self):
self.log.info("Added pubkey for user %s project %s into: %s",
user, project, pubkey_path)

@skipped_for_source_build
def _setup_for_user_ssh(self):
"""
Setup the builder for user SSH
https://github.com/fedora-copr/debate/tree/main/user-ssh-builders
If the builder setup for user SSH becomes more complicated than just
installing the public key, we might want to move the code to a script
within `copr-builder` and call it here or from `copr-rpmbuild`. There
is no requirement for it to be here.
"""
if not self.job.ssh_public_keys:
return
self._alloc_root_ssh_connection()
self._deploy_user_ssh()
self._set_default_expiration()

def _alloc_root_ssh_connection(self):
self.log.info("Allocating root ssh connection to builder")
self.root_ssh = SSHConnection(
user="root",
host=self.host.hostname,
config_file=self.opts.ssh.builder_config,
log=self.log,
)

def _deploy_user_ssh(self):
"""
Deploy user public key to the builder, so that they can connect via SSH.
"""
pubkey = shlex.quote(self.job.ssh_public_keys)
cmd = COMMANDS["echo_authorized_keys"].format(pubkey)
rc, _out, _err = self.root_ssh.run_expensive(cmd)
if rc != 0:
self.log.error("Failed to deploy user SSH key for %s",
self.job.project_owner)
return
self.log.info("Deployed user SSH key for %s", self.job.project_owner)

def _set_default_expiration(self):
"""
Set the default expiration time for the builder
"""
default = self.job.started_on + USER_SSH_DEFAULT_EXPIRATION
cmd = COMMANDS["set_expiration"].format(shlex.quote(str(default)))
rc, _out, _err = self.root_ssh.run_expensive(cmd)
if rc != 0:
# This only affects the `copr-builder show` command to print unknown
# remaining time. It won't affect the backend in terminating the
# buidler when it is supposed to
self.log.error("Failed to set the default expiration time")
return
self.log.info("The expiration time was set to %s", default)

def _builder_expiration(self):
"""
Find the user preference for the builder expiration.
"""
rc, out, _err = self.root_ssh.run_expensive(
COMMANDS["cat_expiration"], subprocess_timeout=60)
if rc == 0:
try:
return datetime.fromtimestamp(float(out))
except ValueError:
pass
self.log.error("Unable to query builder expiration file")
return None

def _keep_alive_for_user_ssh(self):
"""
Wait until user releases the VM or until it expires.
"""
if not self.job.ssh_public_keys:
return

# We are calculating the limits from when the job started but we may
# want to consider starting the watch when job ends.
default = datetime.fromtimestamp(
self.job.started_on + USER_SSH_DEFAULT_EXPIRATION)
maxlimit = datetime.fromtimestamp(
self.job.started_on + USER_SSH_MAX_EXPIRATION)

# Highlight this portion of the log because it is the only part of
# the backend.log that is directly for the end users
self.log.info("\n\nKeeping builder alive for user SSH")
self.log.info("The owner of this build can connect using:")
self.log.info("ssh root@%s", self.host.hostname)
self.log.info("Unless you connect to the builder and prolong its "
"expiration, it will be shut-down in %s",
default.strftime("%Y-%m-%d %H:%M"))
self.log.info("After connecting, run `copr-builder help' for "
"complete instructions\n\n")

def _keep_alive():
while True:
if self.canceled:
self.log.warning("Build canceled, VM will be shut-down soon")
break
expiration = self._builder_expiration() or default
if datetime.now() > expiration:
self.log.warning("VM expired, it will be shut-down soon")
break
if datetime.now() > maxlimit:
msg = "VM exceeded max limit, it will be shut-down soon"
self.log.warning(msg)
break
time.sleep(60)

CancellableThreadTask(
_keep_alive,
self._cancel_task_check_request,
self._cancel_running_worker,
check_period=CANCEL_CHECK_PERIOD,
).run()
if self.canceled:
raise BuildCanceled

def build(self, attempt):
"""
Attempt to build.
Expand All @@ -754,6 +897,7 @@ def build(self, attempt):
self._fill_build_info_file()
self._cancel_if_requested()
self._mark_running(attempt)
self._setup_for_user_ssh()
self._start_remote_build()
transfer_failure = CancellableThreadTask(
self._transfer_log_file,
Expand All @@ -766,6 +910,8 @@ def build(self, attempt):
if transfer_failure:
raise BuildRetry("SSH problems when downloading live log: {}"
.format(transfer_failure))

self._keep_alive_for_user_ssh()
self._download_results()
self._drop_host()

Expand Down
6 changes: 6 additions & 0 deletions backend/copr_backend/daemons/build_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
ArchitectureWorkerLimit,
ArchitectureUserWorkerLimit,
BuildTagLimit,
UserSSHLimit,
RPMBuildWorkerManager,
BuildQueueTask,
)
Expand Down Expand Up @@ -105,6 +106,11 @@ def __init__(self, backend_opts):
name=limit_type,
))

limit = backend_opts.builds_limits["userssh"]
userssh = UserSSHLimit(limit)
self.log.info("setting %s limit to %s", "userssh", limit)
self.limits.append(userssh)

def get_frontend_tasks(self):
"""
Retrieve a list of build jobs to be done.
Expand Down
2 changes: 2 additions & 0 deletions backend/copr_backend/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,8 @@ def _get_limits_conf(parser):
parser, "backend", "builds_max_workers_sandbox", 10, mode="int")
limits['owner'] = _get_conf(
parser, "backend", "builds_max_workers_owner", 20, mode="int")
limits['userssh'] = _get_conf(
parser, "backend", "builds_max_userssh", 2, mode="int")
return limits


Expand Down
3 changes: 3 additions & 0 deletions backend/copr_backend/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def __init__(self, task_data, worker_opts):
- timeout: default worker timeout
"""
# pylint: disable=too-many-statements

self.timeout = worker_opts.timeout
self.frontend_base_url = worker_opts.frontend_base_url
Expand Down Expand Up @@ -72,6 +73,8 @@ def __init__(self, task_data, worker_opts):

self.results = None
self.appstream = None
self.allow_user_ssh = None
self.ssh_public_keys = None

# TODO: validate update data
for key, val in task_data.items():
Expand Down
19 changes: 19 additions & 0 deletions backend/copr_backend/rpm_builds.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,25 @@ def __init__(self, architecture, limit):
)


class UserSSHLimit(HashWorkerLimit):
"""
Limit the number of builders that allow user SSH
"""
def __init__(self, limit):
def hasher(x):
# We don't allow user SSH for SRPM builds, returning None will
# make this unlimited
if x.source_build:
return None

# Don't limit builds that doesn't allow user SSH
# pylint: disable=protected-access
if not x._task.get("allow_user_ssh"):
return None
return x.owner
super().__init__(hasher, limit, name="userssh")


class BuildTagLimit(PredicateWorkerLimit):
"""
Limit the amount of concurrently running builds per given build tag.
Expand Down
23 changes: 18 additions & 5 deletions backend/copr_backend/sshcmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ def _full_source_path(self, src):
return "{}@{}:{}".format(self.user, host, src)

def rsync_download(self, src, dest, logfile=None, max_retries=0,
subprocess_timeout=None):
subprocess_timeout=None, filter_=None):
"""
Run rsync over pre-allocated socket (by the config)
Expand All @@ -231,9 +231,9 @@ def rsync_download(self, src, dest, logfile=None, max_retries=0,
directory needs to exist.
"""
self._retry(self._rsync_download, max_retries, src, dest, logfile,
subprocess_timeout)
subprocess_timeout, filter_)

def _rsync_download(self, src, dest, logfile, subprocess_timeout):
def _rsync_download(self, src, dest, logfile, subprocess_timeout, filter_):
ssh_opts = "ssh"
if self.config_file:
ssh_opts += " -F " + self.config_file
Expand All @@ -243,8 +243,21 @@ def _rsync_download(self, src, dest, logfile, subprocess_timeout):
log_filepath = "/dev/null"
if logfile:
log_filepath = os.path.join(dest, logfile)
command = "/usr/bin/rsync -rltDvH --chmod=D755,F644 -e '{}' {} {}/ &> {}".format(
ssh_opts, full_source_path, dest, log_filepath)

command = [
"/usr/bin/rsync",
"-rltDvH",
"--chmod=D755,F644",
"-e", "'{}'".format(ssh_opts),
]
for value in filter_ or []:
command.extend(["--filter", shlex.quote(value)])
command.extend([
full_source_path,
"{}/".format(dest),
"&>", log_filepath,
])
command = " ".join(command)

self.log.info("rsyncing of %s to %s started", full_source_path, dest)
with self._popen_timeouted(command, shell=True) as cmd:
Expand Down
5 changes: 4 additions & 1 deletion backend/tests/test_config_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ def test_minimal_file_and_defaults(self):
opts = BackendConfigReader(self.get_minimal_config_file()).read()
assert opts.destdir == "/tmp"
assert opts.builds_limits == {'arch': {}, 'tag': {}, 'owner': 20,
'sandbox': 10, 'arch_per_owner': {}}
'sandbox': 10, 'arch_per_owner': {},
'userssh': 2}

def test_correct_build_limits(self):
opts = BackendConfigReader(
Expand All @@ -50,6 +51,7 @@ def test_correct_build_limits(self):
"builds_max_workers_owner = 5\n"
"builds_max_workers_sandbox = 3\n"
"builds_max_workers_arch_per_owner = ppc64le=11, s390x=5\n"
"builds_max_userssh = 7\n"
))).read()
assert opts.builds_limits == {
'arch': {
Expand All @@ -65,6 +67,7 @@ def test_correct_build_limits(self):
'ppc64le': 11,
's390x': 5,
},
'userssh': 7,
}

@pytest.mark.parametrize("broken_config", [
Expand Down
3 changes: 2 additions & 1 deletion backend/tests/testlib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,8 @@ def _full_source_path(self, src):
return src

def rsync_download(self, src, dest, logfile=None, max_retries=0,
subprocess_timeout=DEFAULT_SUBPROCESS_TIMEOUT):
subprocess_timeout=DEFAULT_SUBPROCESS_TIMEOUT,
filter_=None):
data = os.environ["TEST_DATA_DIRECTORY"]
trail_slash = src.endswith("/")
src = os.path.join(data, "build_results", self.resultdir)
Expand Down
Loading

0 comments on commit c5d0ca5

Please sign in to comment.