Skip to content

Commit

Permalink
implement GolangEnvModification feature gate (#365)
Browse files Browse the repository at this point in the history
* implement GolangEnvModification

* fix test
  • Loading branch information
sanposhiho authored Feb 26, 2024
1 parent 7442eea commit b560550
Show file tree
Hide file tree
Showing 5 changed files with 361 additions and 21 deletions.
2 changes: 1 addition & 1 deletion api/core/v1/webhook_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ var _ = BeforeSuite(func() {
factory := informers.NewSharedInformerFactory(kubeClient, defaultResyncPeriod)

controllerFetcher := controllerfetcher.NewControllerFetcher(mgr.GetConfig(), kubeClient, factory, scaleCacheEntryFreshnessTime, scaleCacheEntryLifetime, scaleCacheEntryJitterFactor)
podService, err := pod.New(map[string]int64{}, "0", controllerFetcher)
podService, err := pod.New(map[string]int64{}, "0", controllerFetcher, nil)
Expect(err).NotTo(HaveOccurred())

podWebhook := New(tortoiseService, podService)
Expand Down
2 changes: 1 addition & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ func main() {
controllerFetcher.Start(ctx, 1*time.Second)
defer cancel()

podService, err := pod.New(config.ResourceLimitMultiplier, config.MinimumCPULimit, controllerFetcher)
podService, err := pod.New(config.ResourceLimitMultiplier, config.MinimumCPULimit, controllerFetcher, config.FeatureFlags)
if err != nil {
setupLog.Error(err, "unable to create pod service")
os.Exit(1)
Expand Down
9 changes: 8 additions & 1 deletion pkg/features/features.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,18 @@ package features
type FeatureFlag string

const (
// Stage: alpha
// Stage: alpha (default: disabled)
// Description: Enable the feature to use the vertical scaling if the replica number reaches the preferred max replicas.
// It aims to reduce the replica number by increasing the resource requests - preventing the situation that too many too small replicas are running.
// Tracked at https://github.com/mercari/tortoise/issues/329.
VerticalScalingBasedOnPreferredMaxReplicas FeatureFlag = "VerticalScalingBasedOnPreferredMaxReplicas"

// Stage: alpha (default: disabled)
// Description: Enable the feature to modify GOMAXPROCS/GOMEMLIMIT based on the resource requests in the Pod mutating webhook.
// Tracked at:
// - https://github.com/mercari/tortoise/issues/319
// - https://github.com/mercari/tortoise/issues/320
GolangEnvModification FeatureFlag = "GolangEnvModification"
)

func Contains(flags []FeatureFlag, flag FeatureFlag) bool {
Expand Down
83 changes: 70 additions & 13 deletions pkg/pod/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package pod

import (
"fmt"
"math"
"strconv"

v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
Expand All @@ -10,26 +12,33 @@ import (
"k8s.io/utils/ptr"

"github.com/mercari/tortoise/api/v1beta3"
"github.com/mercari/tortoise/pkg/features"
"github.com/mercari/tortoise/pkg/utils"
)

type Service struct {
// For example, if it's 3 and Pod's resource request is 100m, the limit will be changed to 300m.
resourceLimitMultiplier map[string]int64
minimumCPULimit resource.Quantity
controllerFetcher controllerfetcher.ControllerFetcher
resourceLimitMultiplier map[string]int64
minimumCPULimit resource.Quantity
controllerFetcher controllerfetcher.ControllerFetcher
golangEnvModificationEnabled bool
}

func New(
resourceLimitMultiplier map[string]int64,
MinimumCPULimit string,
minimumCPULimit string,
cf controllerfetcher.ControllerFetcher,
featureFlags []features.FeatureFlag,
) (*Service, error) {
minCPULim := resource.MustParse(MinimumCPULimit)
if minimumCPULimit == "" {
minimumCPULimit = "0"
}
minCPULim := resource.MustParse(minimumCPULimit)
return &Service{
resourceLimitMultiplier: resourceLimitMultiplier,
minimumCPULimit: minCPULim,
controllerFetcher: cf,
resourceLimitMultiplier: resourceLimitMultiplier,
minimumCPULimit: minCPULim,
controllerFetcher: cf,
golangEnvModificationEnabled: features.Contains(featureFlags, features.GolangEnvModification),
}, nil
}

Expand All @@ -40,21 +49,22 @@ func (s *Service) ModifyPodResource(pod *v1.Pod, t *v1beta3.Tortoise) {
}

oldRequestsMap := map[containerNameAndResource]resource.Quantity{}
// For example, if the resource request is changed 100m → 200m, 2 will be stored.
requestChangeRatio := map[containerNameAndResource]float64{}
newRequestsMap := map[containerNameAndResource]resource.Quantity{}

// Update resource requests based on the tortoise.Status.Conditions.ContainerResourceRequests
for i, container := range pod.Spec.Containers {
for k, oldReq := range container.Resources.Requests {
oldRequestsMap[containerNameAndResource{containerName: container.Name, resourceName: k}] = oldReq

newReq, ok := utils.GetRequestFromTortoise(t, container.Name, k)
if !ok {
// Unchange, just store the old value as a new value
newRequestsMap[containerNameAndResource{containerName: container.Name, resourceName: k}] = oldReq
continue
newReq = oldReq
}
pod.Spec.Containers[i].Resources.Requests[k] = newReq
oldRequestsMap[containerNameAndResource{containerName: container.Name, resourceName: k}] = oldReq
newRequestsMap[containerNameAndResource{containerName: container.Name, resourceName: k}] = newReq
pod.Spec.Containers[i].Resources.Requests[k] = newReq
requestChangeRatio[containerNameAndResource{containerName: container.Name, resourceName: k}] = float64(newReq.MilliValue()) / float64(oldReq.MilliValue())
}
}

Expand Down Expand Up @@ -89,6 +99,53 @@ func (s *Service) ModifyPodResource(pod *v1.Pod, t *v1beta3.Tortoise) {
pod.Spec.Containers[i].Resources.Limits[k] = *newLim
}
}

if !s.golangEnvModificationEnabled {
return
}

// Update GOMEMLIMIT and GOMAXPROCS
for i, container := range pod.Spec.Containers {
for j, env := range container.Env {
if env.Name == "GOMAXPROCS" {
// e.g., If CPU is increased twice, GOMAXPROCS should be doubled.
changeRatio, ok := requestChangeRatio[containerNameAndResource{
containerName: container.Name,
resourceName: v1.ResourceCPU,
}]
if !ok {
continue
}
oldNum, err := strconv.Atoi(env.Value)
if err != nil {
// invalid GOMAXPROCS, skip
continue
}
newUncapedNum := float64(oldNum) * changeRatio
// GOMAXPROCS should be an integer.
newNum := int(math.Ceil(newUncapedNum))
pod.Spec.Containers[i].Env[j].Value = strconv.Itoa(newNum)

}

if env.Name == "GOMEMLIMIT" {
changeRatio, ok := requestChangeRatio[containerNameAndResource{
containerName: container.Name,
resourceName: v1.ResourceMemory,
}]
if !ok {
continue
}
oldNum, err := resource.ParseQuantity(env.Value)
if err != nil {
// invalid GOMEMLIMIT, skip
continue
}
newNum := int64(float64(oldNum.MilliValue()) * changeRatio)
pod.Spec.Containers[i].Env[j].Value = resource.NewMilliQuantity(newNum, oldNum.Format).String()
}
}
}
}

func (s *Service) GetDeploymentForPod(pod *v1.Pod) (string, error) {
Expand Down
Loading

0 comments on commit b560550

Please sign in to comment.