This repository has been archived by the owner on Sep 25, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcontroller.go
311 lines (274 loc) · 10.3 KB
/
controller.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
package main
import (
"fmt"
"regexp"
"time"
"github.com/ovotech/iam-service-account-controller/pkg/iam"
iamerrors "github.com/ovotech/iam-service-account-controller/pkg/iam/errors"
corev1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"
coreinformers "k8s.io/client-go/informers/core/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/workqueue"
"k8s.io/klog"
)
const (
managedAnnotationKey = "security.kaluza.com/iam-role-managed"
roleAnnotationKey = "eks.amazonaws.com/role-arn"
SyncSuccess = "Synced"
MessageResourceSynced = "Successfully synced AWS IAM role"
SyncFailed = "SyncFailed"
MessageRoleCreationFailed = "Failed to create AWS IAM role due to: %s"
SyncWarning = "SyncWarning"
MessageUnmanagedRole = "AWS IAM role exists but is not managed by controller"
MessageMisconfiguredARN = "ServiceAccount is managed but ARN doesn't match spec"
)
type Controller struct {
kubeclientset kubernetes.Interface
serviceAccountsLister corelisters.ServiceAccountLister
serviceAccountsSynced cache.InformerSynced
workqueue workqueue.RateLimitingInterface
recorder record.EventRecorder
iam *iam.Manager
}
func NewController(
kubeclientset kubernetes.Interface,
serviceAccountInformer coreinformers.ServiceAccountInformer,
iamManager *iam.Manager,
) *Controller {
klog.Info("Creating event broadcaster")
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartStructuredLogging(0)
eventBroadcaster.StartRecordingToSink(
&typedcorev1.EventSinkImpl{Interface: kubeclientset.CoreV1().Events("")},
)
recorder := eventBroadcaster.NewRecorder(
scheme.Scheme,
corev1.EventSource{Component: controllerName},
)
controller := &Controller{
kubeclientset: kubeclientset,
serviceAccountsLister: serviceAccountInformer.Lister(),
serviceAccountsSynced: serviceAccountInformer.Informer().HasSynced,
workqueue: workqueue.NewNamedRateLimitingQueue(
workqueue.DefaultControllerRateLimiter(),
"ServiceAccounts",
),
recorder: recorder,
iam: iamManager,
}
klog.Info("Setting up event handlers")
serviceAccountInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controller.enqueueServiceAccount,
UpdateFunc: func(old, new interface{}) {
controller.enqueueServiceAccount(new)
},
DeleteFunc: controller.enqueueServiceAccount,
})
return controller
}
// Run will set up the event handlers for types we are interested in, as well
// as syncing informer caches and starting workers. It will block until stopCh
// is closed, at which point it will shutdown the workqueue and wait for
// workers to finish processing their current work items.
func (c *Controller) Run(threadiness int, stopCh <-chan struct{}) error {
defer utilruntime.HandleCrash()
defer c.workqueue.ShutDown()
// Start the informer factories to begin populating the informer caches
klog.Info("Starting ServiceAccount controller")
// Wait for the caches to be synced before starting workers
klog.Info("Waiting for informer caches to sync")
if ok := cache.WaitForCacheSync(stopCh, c.serviceAccountsSynced); !ok {
return fmt.Errorf("failed to wait for caches to sync")
}
klog.Info("Starting workers")
// Launch workers to process ServiceAccount resources
for i := 0; i < threadiness; i++ {
go wait.Until(c.runWorker, time.Second, stopCh)
}
klog.Info("Started workers")
<-stopCh
klog.Info("Shutting down workers")
return nil
}
// runWorker is a long-running function that will continually call the
// processNextWorkItem function in order to read and process a message on the
// workqueue.
func (c *Controller) runWorker() {
for c.processNextWorkItem() {
}
}
// processNextWorkItem will read a single work item off the workqueue and
// attempt to process it, by calling the syncHandler.
func (c *Controller) processNextWorkItem() bool {
obj, shutdown := c.workqueue.Get()
if shutdown {
return false
}
// We wrap this block in a func so we can defer c.workqueue.Done.
err := func(obj interface{}) error {
// We call Done here so the workqueue knows we have finished
// processing this item. We also must remember to call Forget if we
// do not want this work item being re-queued. For example, we do
// not call Forget if a transient error occurs, instead the item is
// put/ back on the workqueue and attempted again after a back-off
// period.
defer c.workqueue.Done(obj)
var key string
var ok bool
// We expect strings to come off the workqueue. These are of the
// form namespace/name. We do this as the delayed nature of the
// workqueue means the items in the informer cache may actually be
// more up to date that when the item was initially put onto the
// workqueue.
if key, ok = obj.(string); !ok {
// As the item in the workqueue is actually invalid, we call
// Forget here else we'd go into a loop of attempting to
// process a work item that is invalid.
c.workqueue.Forget(obj)
utilruntime.HandleError(fmt.Errorf("expected string in workqueue but got %#v", obj))
return nil
}
// Run the syncHandler, passing it the namespace/name string of the
// ServiceAccount resource to be synced.
if err := c.syncHandler(key); err != nil {
// Put the item back on the workqueue to handle any transient errors.
c.workqueue.AddRateLimited(key)
return fmt.Errorf("error syncing '%s': %s, requeuing", key, err.Error())
}
// Finally, if no error occurs we Forget this item so it does not
// get queued again until another change happens.
c.workqueue.Forget(obj)
klog.Infof("Successfully synced '%s'", key)
return nil
}(obj)
if err != nil {
utilruntime.HandleError(err)
return true
}
return true
}
// syncHandler compares the actual state with the desired, and attempts to
// converge the two.
func (c *Controller) syncHandler(serviceAccountKey string) error {
klog.Infof("Syncing %s\n", serviceAccountKey)
// Convert the namespace/name string into a distinct namespace and name
namespace, name, err := cache.SplitMetaNamespaceKey(serviceAccountKey)
if err != nil {
utilruntime.HandleError(fmt.Errorf("Invalid resource key: %s", serviceAccountKey))
return nil
}
// name and namespace are input from outside our trust boundary and we use them in a few
// sensitive places including when constructing access policies.
// We make sure they don't contain any sneaky characters here.
// Note that if they're not valid we return a nil error because we don't want to requeue them.
if !isValidUserInput(namespace) || !isValidUserInput(name) {
klog.Infof(
"ServiceAccount key '%s' contains unexpected user input",
serviceAccountKey,
)
return nil
}
// Get the ServiceAccount resource with this namespace/name.
sa, err := c.serviceAccountsLister.ServiceAccounts(namespace).Get(name)
if err != nil {
// The ServiceAccount no longer exists (i.e. it's been deleted from the cluster).
// We ensure its IAM Role is removed from AWS.
if k8serrors.IsNotFound(err) {
klog.Infof(
"ServiceAccount '%s' no longer exists, will delete its IAM Role",
serviceAccountKey,
)
if err := c.iam.DeleteRole(name, namespace); err != nil {
return err
}
return nil
}
// Requeue to try again
return err
}
role, err := c.iam.GetRole(name, namespace)
switch {
case err == nil:
// The role already exists, check if it's managed by us
if c.iam.IsManaged(role) {
c.recorder.Event(sa, corev1.EventTypeNormal, SyncSuccess, MessageResourceSynced)
} else {
c.recorder.Event(sa, corev1.EventTypeWarning, SyncWarning, MessageUnmanagedRole)
}
case iamerrors.IsNotFound(err):
// The role doesn't exist yet, we need to create it
klog.Infof("No IAM Role for '%s'; creating it", serviceAccountKey)
if err := c.iam.CreateRole(name, namespace); err != nil {
// Failed to create the role for some reason
// We log an error event and requeue
c.recorder.Event(
sa,
corev1.EventTypeWarning,
SyncFailed,
fmt.Sprintf(MessageRoleCreationFailed, err.Error()),
)
return err
}
c.recorder.Event(sa, corev1.EventTypeNormal, SyncSuccess, MessageResourceSynced)
return nil
default:
// Some other error we can't handle now, requeue
return err
}
return nil
}
// enqueueServiceAccount takes a ServiceAccount resource and converts it into a namespace/name
// string which is then put onto the work queue. It first checks the ServiceAccount's annotations to
// see if this SA should be managed by this controller.
func (c *Controller) enqueueServiceAccount(obj interface{}) {
var sa *corev1.ServiceAccount = obj.(*corev1.ServiceAccount)
// Don't proceed if this doesn't have annotation indicating it's managed by this controller
if val, ok := sa.ObjectMeta.Annotations[managedAnnotationKey]; !ok || val != "true" {
return
}
// We only treat ServiceAccounts that have an annotation of the form:
// eks.amazonaws.com/role-arn: arn:aws:iam::<ACCOUNT_ID>:role/<IAM_ROLE_NAME>
//
// We also have a strict naming convention for the IAM_ROLE_NAME. If the IAM_ROLE_NAME in this
// ServiceAccount's annotation doesn't match
// (prefix_)namespace_name
// then we ignore log an warning and ignore the event.
if val, ok := sa.ObjectMeta.Annotations[roleAnnotationKey]; ok {
if val != c.iam.MakeRoleARN(
sa.ObjectMeta.Name,
sa.ObjectMeta.Namespace,
) {
klog.Infof(
"ServiceAccount '%s/%s' wants to be managed by controller but ARN doesn't match spec",
sa.ObjectMeta.Namespace,
sa.ObjectMeta.Name,
)
c.recorder.Event(sa, corev1.EventTypeWarning, SyncWarning, MessageMisconfiguredARN)
return
}
var key string
var err error
if key, err = cache.MetaNamespaceKeyFunc(obj); err != nil {
utilruntime.HandleError(err)
return
}
c.workqueue.Add(key)
}
}
// validateUserInput takes a user input string and returns true if the input is acceptable from a
// security point of view.
func isValidUserInput(input string) bool {
var isValidString = regexp.MustCompile(`^[1-9a-z-]+$`).MatchString
if isValidString(input) {
return true
}
return false
}