Skip to content

Commit

Permalink
chore: not all requests use slots
Browse files Browse the repository at this point in the history
  • Loading branch information
mcharytoniuk committed May 13, 2024
1 parent 7bec92d commit 62a68b4
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 5 deletions.
12 changes: 10 additions & 2 deletions loadbalancer/LoadBalancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ type LoadBalancer struct {
Logger hclog.Logger
}

func (self *LoadBalancer) Balance(request *http.Request) (*url.URL, error) {
headTarget := self.LoadBalancerTargetCollection.GetForBalancing()
func (self *LoadBalancer) Balance(request *LoadBalancerRequest) (*url.URL, error) {
headTarget := self.GetLlamaCppTargetForRequest(request)

if headTarget == nil {
return nil, ErrorNoTargetsAvailable
Expand All @@ -42,6 +42,14 @@ func (self *LoadBalancer) Balance(request *http.Request) (*url.URL, error) {
return targetUrl, nil
}

func (self *LoadBalancer) GetLlamaCppTargetForRequest(request *LoadBalancerRequest) *LlamaCppTarget {
if request.IsSlottable() {
return self.LoadBalancerTargetCollection.GetForBalancingSlot()
}

return self.LoadBalancerTargetCollection.GetHead()
}

func (self *LoadBalancer) GetStatus() *LoadBalancerStatus {
return &LoadBalancerStatus{
RegisteredTargets: self.LoadBalancerTargetCollection.Len(),
Expand Down
11 changes: 11 additions & 0 deletions loadbalancer/LoadBalancerRequest.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package loadbalancer

import "net/http"

type LoadBalancerRequest struct {
HttpRequest *http.Request
}

func (self *LoadBalancerRequest) IsSlottable() bool {
return self.HttpRequest.Method == "POST" && self.HttpRequest.URL.Path == "/completion"
}
13 changes: 11 additions & 2 deletions loadbalancer/LoadBalancerTargetCollection.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,24 @@ func (self *LoadBalancerTargetCollection) HasTargetConfiguration(
return ok
}

func (self *LoadBalancerTargetCollection) GetForBalancing() *LlamaCppTarget {
func (self *LoadBalancerTargetCollection) GetHead() *LlamaCppTarget {
if self.targetHeap.Len() < 1 {
return nil
}

return (*self.targetHeap)[0]
}

func (self *LoadBalancerTargetCollection) GetForBalancingSlot() *LlamaCppTarget {
headTarget := self.GetHead()

if headTarget == nil {
return nil
}

self.mutex.Lock()
defer self.mutex.Unlock()

headTarget := (*self.targetHeap)[0]
headTarget.LlamaCppHealthStatus.SlotsIdle -= 1
heap.Fix(self.targetHeap, 0)

Expand Down
4 changes: 3 additions & 1 deletion loadbalancer/ReverseProxyServer.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ func (self *ReverseProxyServer) Serve(serverEventsChannel chan goroutine.ResultM
InferLevels: true,
}),
Rewrite: func(request *httputil.ProxyRequest) {
targetUrl, err := self.LoadBalancer.Balance(request.In)
targetUrl, err := self.LoadBalancer.Balance(&LoadBalancerRequest{
HttpRequest: request.In,
})

if err != nil {
serverEventsChannel <- goroutine.ResultMessage{
Expand Down

0 comments on commit 62a68b4

Please sign in to comment.