forked from kubernetes-retired/kube-aws
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Automatic recovery from permanent failures of etcd3 nodes (kubernetes…
…-retired#417) ## Features * Automatic recovery from any number of permanently failed etcd nodes (when `etcd.disasterRecovery.automated` is set to true * Currently, it is considered to be a "permanent failure" when health checks to an etcd member is failing longer than the threshold(10 seconds by default) ## Notable changes * Upgraded etcd to v3 * kube-apiserver uses etcd3 API rather than the former etcd2 API for writing v3 keys when etcd3 is chosen for the etcd cluster * Although you can stick with v2 api + v3 etcd(then kube-apiserver writes v2 keys in etcd v3!), the current implementation of etcd snapshot/restore doesn't support etcd v2 data. That's why I made this change * etcd3 will be the default storage backend since k8s 1.6 anyways * Wrote a lengthy `etcdadm` script to automate etcd member health checking, saving snapshots, recovering from up to `N/2` permanently failed nodes(=replace failed members one by one), recovering from more than `N/2` permanently failed nodes(=bootstrapping a brand-new cluster with the latest snapshot) ## Changelog * Add etcdadm for scripting various etcd3 administration tasks to achieve high availability * Enable etcd3 paired with the `etcdadm reconfigure` service * Periodically run etcdadm (save|check) when `etcd.snapshot.automated` or `etcd.disasterRecovery.automated` is set to true, respectively * Fail-fast etcd-member when `etcdadm reconfigure` fails * When etcd3 is chosen, use etcd v3 API for communication between etcd and apiserver k8s state is persisted in etcd3 data rather than etcd2 data in an etcd3 cluster(etcd3 can serve both v2 api and v3 api for accessing v2 data and v3 data respectively) * Turn off automatic Container Linux updates on Etcd nodes * Support both v2 and v3 of etcd & Allow switching etcd version (for now) * Fix a validation for awsNodeLabels * e2e: Add a convenient sub-command for invoking kube-aws on a specific test cluster * Start etcd health-checking only after etcd is tried to be started or we'll end up with health-checking doesn't work when etcd failed to start at all
- Loading branch information
Showing
18 changed files
with
2,333 additions
and
50 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ | |
/e2e/assets | ||
*~ | ||
/core/*/config/templates.go | ||
/core/*/config/files.go | ||
.idea/ | ||
.envrc | ||
coverage.txt | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
// +build ignore | ||
|
||
package main | ||
|
||
import ( | ||
"bytes" | ||
"fmt" | ||
"io/ioutil" | ||
"log" | ||
"os" | ||
"strings" | ||
"text/template" | ||
"time" | ||
) | ||
|
||
type Entry struct { | ||
Filename string | ||
VarName string | ||
} | ||
|
||
type Data struct { | ||
Vars []Var | ||
Now time.Time | ||
} | ||
|
||
type Var struct { | ||
Name string | ||
Data string | ||
} | ||
|
||
var tmpl = template.Must(template.New("files.go").Parse(`package config | ||
// This file was generated by files_gen.go. DO NOT EDIT by hand. | ||
// | ||
// Last generated at {{ .Now }}. | ||
var ( | ||
{{ range $i, $var := .Vars }} {{ $var.Name }} = _{{ $var.Name }} | ||
{{ end }} | ||
) | ||
var ( | ||
{{ range $i, $var := .Vars }} _{{ $var.Name }} = {{ $var.Data }}{{ end }} | ||
) | ||
`)) | ||
|
||
func toGoByteSlice(sli []byte) string { | ||
buff := new(bytes.Buffer) | ||
fmt.Fprintf(buff, "[]byte{\n") | ||
for i, b := range sli { | ||
if i%10 == 0 { | ||
fmt.Fprintf(buff, "\t%#x,", b) | ||
} else { | ||
fmt.Fprintf(buff, " %#x,", b) | ||
} | ||
if (i+1)%10 == 0 { | ||
fmt.Fprintln(buff) | ||
} | ||
} | ||
fmt.Fprintf(buff, "\n}\n") | ||
return buff.String() | ||
} | ||
|
||
func main() { | ||
entries := []Entry{} | ||
args := os.Args[1:] | ||
for _, arg := range args { | ||
parts := strings.Split(arg, "=") | ||
varname, filename := parts[0], parts[1] | ||
entry := Entry{ | ||
Filename: filename, | ||
VarName: varname, | ||
} | ||
entries = append(entries, entry) | ||
} | ||
|
||
vars := make([]Var, len(entries)) | ||
for i, file := range entries { | ||
data, err := ioutil.ReadFile(file.Filename) | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
vars[i] = Var{file.VarName, toGoByteSlice(data)} | ||
} | ||
|
||
f, err := os.OpenFile("files.go", os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
defer f.Close() | ||
data := Data{vars, time.Now().UTC()} | ||
if err := tmpl.Execute(f, data); err != nil { | ||
log.Fatal("Failed to render template:", err) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.