From ef4f02f4b5cc72a76d47c6be9a4ee1c2c6f8027f Mon Sep 17 00:00:00 2001 From: Ali Naqvi Date: Thu, 28 Nov 2024 13:28:47 +0800 Subject: [PATCH] feat: PPT-1701 refactored recompile/reload logic (#395) --- shard.lock | 18 ++--- src/placeos-rest-api/controllers/drivers.cr | 83 +++++++++++++++------ 2 files changed, 70 insertions(+), 31 deletions(-) diff --git a/shard.lock b/shard.lock index 5a62a388..289a21e6 100644 --- a/shard.lock +++ b/shard.lock @@ -7,7 +7,7 @@ shards: action-controller: git: https://github.com/spider-gazelle/action-controller.git - version: 7.4.3 + version: 7.5.0 active-model: git: https://github.com/spider-gazelle/active-model.git @@ -15,7 +15,7 @@ shards: ameba: git: https://github.com/crystal-ameba/ameba.git - version: 1.6.3 + version: 1.6.4 any_hash: git: https://github.com/sija/any_hash.cr.git @@ -231,19 +231,19 @@ shards: placeos-core: git: https://github.com/placeos/core.git - version: 4.14.5+git.commit.3fafd33ac39abc2027f33bbbce3e9b7442907b1c + version: 4.16.0+git.commit.5fc50051ca6250930059028aeacbc8007244b23f placeos-core-client: # Overridden git: https://github.com/placeos/core-client.git - version: 1.0.6 + version: 1.1.0 placeos-driver: git: https://github.com/placeos/driver.git - version: 7.2.16 + version: 7.2.18 placeos-frontend-loader: git: https://github.com/placeos/frontend-loader.git - version: 2.7.1+git.commit.b95767d5e0f4b2ecb75ed2b652c4642395ba7fd7 + version: 2.7.1+git.commit.a8bf2b8c4489693405e7be59eb07dd8d103d14d4 placeos-log-backend: git: https://github.com/place-labs/log-backend.git @@ -267,7 +267,7 @@ shards: protobuf: git: https://github.com/jeromegn/protobuf.cr.git - version: 2.3.0 + version: 2.3.1 qr-code: git: https://github.com/spider-gazelle/qr-code.git @@ -307,7 +307,7 @@ shards: search-ingest: git: https://github.com/placeos/search-ingest.git - version: 2.11.2+git.commit.119945c8289385744f46c937691d670584411407 + version: 2.11.2+git.commit.6fc646dfeb8ef4157d35a64496d5824dbdc25659 secrets-env: # Overridden git: https://github.com/spider-gazelle/secrets-env.git @@ -323,7 +323,7 @@ shards: ssh2: git: https://github.com/spider-gazelle/ssh2.cr.git - version: 1.6.1 + version: 1.7.0 stumpy_core: git: https://github.com/stumpycr/stumpy_core.git diff --git a/src/placeos-rest-api/controllers/drivers.cr b/src/placeos-rest-api/controllers/drivers.cr index cefc9464..8590e62e 100644 --- a/src/placeos-rest-api/controllers/drivers.cr +++ b/src/placeos-rest-api/controllers/drivers.cr @@ -97,32 +97,71 @@ module PlaceOS::Api end # force recompile a driver, useful if libraries and supporting files have been updated - @[AC::Route::POST("/:id/recompile", status: { - Nil => HTTP::Status::ALREADY_REPORTED, - })] - def recompile : ::PlaceOS::Model::Driver? - if current_driver.commit.starts_with?("RECOMPILE") - nil - else - if recompiled = Drivers.recompile(current_driver) - if recompiled.destroyed? - raise Error::NotFound.new("driver was deleted") - else - recompiled - end - else - raise IO::TimeoutError.new("time exceeded waiting for driver to recompile") - end + @[AC::Route::POST("/:id/recompile")] + def recompile : String + if (repository = current_driver.repository).nil? + Log.error { {repository_id: current_driver.repository_id, message: "failed to load driver's repository"} } + raise "failed to load driver's repository" + end + + resp = self.class.driver_recompile(current_driver, repository, request_id) + + unless 200 <= resp.first <= 299 + render status: resp.first, text: resp.last end + + resp = self.class.driver_reload(current_driver, request_id) + + render status: resp.first, text: resp.last end - def self.recompile(driver : ::PlaceOS::Model::Driver) - # Set the repository commit hash to head - driver.update_fields(commit: "RECOMPILE-#{driver.commit}") + def self.driver_recompile(driver : ::PlaceOS::Model::Driver, repository : ::PlaceOS::Model::Repository, request_id : String) + Api::Systems.core_for(driver.file_name, request_id) do |core_client| + core_client.driver_recompile( + file_name: URI.encode_path(driver.file_name), + commit: driver.commit, + repository: repository.folder_name, + tag: driver.id.as(String), + ) + end + rescue e + Log.error(exception: e) { "failed to request driver recompilation from core" } + {500, e.message || "failed to request driver recompilation"} + end - # Wait until the commit hash is not head with a timeout of 90 seconds - Utils::Changefeeds.await_model_change(driver, timeout: 90.seconds) do |update| - update.destroyed? || !update.recompile_commit? + def self.driver_reload(driver : ::PlaceOS::Model::Driver, request_id : String) : Tuple(Int32, String) + cores = RemoteDriver.default_discovery.node_hash + channel = Channel(Tuple(Int32, String)).new(cores.size) + cores.each do |cid, core_uri| + ->(core_id : String, uri : URI) do + spawn do + client = PlaceOS::Core::Client.new(uri: uri, request_id: request_id) + resp = client.driver_reload(driver.id.as(String)) + channel.send(resp) + rescue error + Log.error(exception: error) { { + message: "failure to request a driver reload on core node", + core_uri: uri.to_s, + core_id: core_id, + driver: driver.id.as(String), + request_id: request_id, + } } + channel.send({500, "failed to request a driver reload on core #{uri}: error: #{error.message}"}) + end + end.call(cid, core_uri) + end + + resps = cores.map do |_, _| + channel.receive + end + + if resps.all? { |resp| 200 <= resp.first <= 299 } + {200, resps.last.last} + elsif resps.all? { |resp| resp.first >= 300 } + {422, "Unable to reload driver on all core cluster"} + else + failed = resps.reject { |resp| 200 <= resp.first <= 299 } + {417, failed.first.last} end end