Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: PPT-1701 refactored recompile/reload logic #395

Merged
merged 4 commits into from
Nov 28, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions shard.lock
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ shards:

action-controller:
git: https://github.com/spider-gazelle/action-controller.git
version: 7.4.3
version: 7.5.0

active-model:
git: https://github.com/spider-gazelle/active-model.git
version: 4.3.2

ameba:
git: https://github.com/crystal-ameba/ameba.git
version: 1.6.3
version: 1.6.4

any_hash:
git: https://github.com/sija/any_hash.cr.git
Expand Down Expand Up @@ -231,19 +231,19 @@ shards:

placeos-core:
git: https://github.com/placeos/core.git
version: 4.14.5+git.commit.3fafd33ac39abc2027f33bbbce3e9b7442907b1c
version: 4.16.0+git.commit.5fc50051ca6250930059028aeacbc8007244b23f

placeos-core-client: # Overridden
git: https://github.com/placeos/core-client.git
version: 1.0.6
version: 1.1.0

placeos-driver:
git: https://github.com/placeos/driver.git
version: 7.2.16
version: 7.2.18

placeos-frontend-loader:
git: https://github.com/placeos/frontend-loader.git
version: 2.7.1+git.commit.b95767d5e0f4b2ecb75ed2b652c4642395ba7fd7
version: 2.7.1+git.commit.a8bf2b8c4489693405e7be59eb07dd8d103d14d4

placeos-log-backend:
git: https://github.com/place-labs/log-backend.git
Expand All @@ -267,7 +267,7 @@ shards:

protobuf:
git: https://github.com/jeromegn/protobuf.cr.git
version: 2.3.0
version: 2.3.1

qr-code:
git: https://github.com/spider-gazelle/qr-code.git
Expand Down Expand Up @@ -307,7 +307,7 @@ shards:

search-ingest:
git: https://github.com/placeos/search-ingest.git
version: 2.11.2+git.commit.119945c8289385744f46c937691d670584411407
version: 2.11.2+git.commit.6fc646dfeb8ef4157d35a64496d5824dbdc25659

secrets-env: # Overridden
git: https://github.com/spider-gazelle/secrets-env.git
Expand All @@ -323,7 +323,7 @@ shards:

ssh2:
git: https://github.com/spider-gazelle/ssh2.cr.git
version: 1.6.1
version: 1.7.0

stumpy_core:
git: https://github.com/stumpycr/stumpy_core.git
Expand Down
85 changes: 63 additions & 22 deletions src/placeos-rest-api/controllers/drivers.cr
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
require "wait_group"
require "./application"

module PlaceOS::Api
Expand Down Expand Up @@ -97,32 +98,72 @@ module PlaceOS::Api
end

# force recompile a driver, useful if libraries and supporting files have been updated
@[AC::Route::POST("/:id/recompile", status: {
Nil => HTTP::Status::ALREADY_REPORTED,
})]
def recompile : ::PlaceOS::Model::Driver?
if current_driver.commit.starts_with?("RECOMPILE")
nil
else
if recompiled = Drivers.recompile(current_driver)
if recompiled.destroyed?
raise Error::NotFound.new("driver was deleted")
else
recompiled
end
else
raise IO::TimeoutError.new("time exceeded waiting for driver to recompile")
end
@[AC::Route::POST("/:id/recompile")]
def recompile : String
if (repository = current_driver.repository).nil?
Log.error { {repository_id: current_driver.repository_id, message: "failed to load driver's repository"} }
raise "failed to load driver's repository"
end

resp = self.class.driver_recompile(current_driver, repository, request_id)

unless 200 <= resp.first <= 299
render status: resp.first, text: resp.last
end

resp = self.class.driver_reload(current_driver, request_id)

render status: resp.first, text: resp.last
end

def self.driver_recompile(driver : ::PlaceOS::Model::Driver, repository : ::PlaceOS::Model::Repository, request_id : String)
Api::Systems.core_for(driver.file_name, request_id) do |core_client|
core_client.driver_recompile(
file_name: URI.encode_path(driver.file_name),
commit: driver.commit,
repository: repository.folder_name,
tag: driver.id.as(String),
)
end
rescue e
Log.error(exception: e) { "failed to request driver recompilation from core" }
{500, e.message || "failed to request driver recompilation"}
end

def self.recompile(driver : ::PlaceOS::Model::Driver)
# Set the repository commit hash to head
driver.update_fields(commit: "RECOMPILE-#{driver.commit}")
def self.driver_reload(driver : ::PlaceOS::Model::Driver, request_id : String) : Tuple(Int32, String)
cores = RemoteDriver.default_discovery.node_hash
wg = WaitGroup.new
channel = Channel(Tuple(Int32, String)).new(cores.size)
cores.each do |core_id, uri|
wg.spawn do
client = PlaceOS::Core::Client.new(uri: uri, request_id: request_id)
resp = client.driver_reload(driver.id.as(String))
channel.send(resp)
rescue error
Log.error(exception: error) { {
message: "failure to request a driver reload on core node",
core_uri: uri.to_s,
core_id: core_id,
driver: driver.id.as(String),
request_id: request_id,
} }
channel.send({500, "failed to request a driver reload on core #{uri}: error: #{error.message}"})
end
end

wg.wait

# Wait until the commit hash is not head with a timeout of 90 seconds
Utils::Changefeeds.await_model_change(driver, timeout: 90.seconds) do |update|
update.destroyed? || !update.recompile_commit?
resps = cores.map do |_, _|
channel.receive
naqvis marked this conversation as resolved.
Show resolved Hide resolved
end

if resps.all? { |resp| 200 <= resp.first <= 299 }
{200, resps.last.last}
elsif resps.all? { |resp| resp.first >= 300 }
{422, "Unable to reload driver on all core cluster"}
else
failed = resps.reject { |resp| 200 <= resp.first <= 299 }
{417, failed.first.last}
end
end

Expand Down
Loading