Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Raft with kind #178

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
1,552 changes: 822 additions & 730 deletions .github/workflows/tests.yaml

Large diffs are not rendered by default.

31 changes: 26 additions & 5 deletions ann_benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,45 @@ set -e

dataset=${DATASET:-"sift-128-euclidean"}
distance=${DISTANCE:-"l2-squared"}
setup=${SETUP:-"docker"}

function wait_weaviate() {
echo "Wait for Weaviate to be ready"
for _ in {1..120}; do
if curl -sf -o /dev/null localhost:8080; then
echo "Weaviate is ready"
break
return 0
fi

echo "Weaviate is not ready, trying again in 1s"
sleep 1
done
echo "ERROR: Weaviate is not ready after 120s"
exit 1
}

echo "Building all required containers"
( cd apps/ann-benchmarks/ && docker build -t ann_benchmarks . )

echo "Starting Weaviate..."
docker compose -f apps/weaviate-no-restart-on-crash/docker-compose.yml up -d
if [ "$setup" == "docker" ]
then
echo "Using docker setup"
docker compose -f apps/weaviate-no-restart-on-crash/docker-compose.yml up -d
elif [ "$setup" == "k8s" ]
then
echo "Using k8s setup"
apps/weaviate/local-k8s.sh setup
else
echo "Unknown setup"
exit 1
fi

wait_weaviate

echo "Run benchmark script"
mkdir -p datasets
(
(
cd datasets;
if [ -f ${dataset}.hdf5 ]
then
Expand All @@ -44,8 +58,15 @@ docker run --network host -t -v "$PWD/results:/workdir/results" -v "$PWD/dataset

echo "Initial run complete, now restart Weaviate"

docker compose -f apps/weaviate-no-restart-on-crash/docker-compose.yml stop weaviate
docker compose -f apps/weaviate-no-restart-on-crash/docker-compose.yml start weaviate
if [ "$setup" == "docker" ]
then
docker compose -f apps/weaviate-no-restart-on-crash/docker-compose.yml stop weaviate
docker compose -f apps/weaviate-no-restart-on-crash/docker-compose.yml start weaviate
elif [ "$setup" == "k8s" ]
then
kubectl scale --replicas=0 statefulset/weaviate -n weaviate
kubectl scale --replicas=${REPLICAS} statefulset/weaviate -n weaviate
fi

wait_weaviate
echo "Weaviate ready, wait 30s for caches to be hot"
Expand Down
1 change: 1 addition & 0 deletions ann_benchmark_aws.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ echo "instance ready: $instance_id"

function cleanup() {
aws ec2 terminate-instances --instance-ids "$instance_id" --region "$region" | jq
aws ec2 wait instance-terminated --instance-ids "$instance_id" --region "$region"
aws ec2 delete-key-pair --key-name "$key_id" --region "$region" | jq
aws ec2 delete-security-group --group-id "$group_id" --region "$region" | jq
}
Expand Down
4 changes: 3 additions & 1 deletion ann_benchmark_compression.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@ function wait_weaviate() {
for _ in {1..120}; do
if curl -sf -o /dev/null localhost:8080; then
echo "Weaviate is ready"
break
return 0
fi

echo "Weaviate is not ready, trying again in 1s"
sleep 1
done
echo "ERROR: Weaviate is not ready after 120s"
exit 1
}

echo "Building all required containers"
Expand Down
1 change: 1 addition & 0 deletions ann_benchmark_compression_aws.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ echo "instance ready: $instance_id"

function cleanup() {
aws ec2 terminate-instances --instance-ids "$instance_id" --region "$region" | jq
aws ec2 wait instance-terminated --instance-ids "$instance_id" --region "$region"
aws ec2 delete-key-pair --key-name "$key_id" --region "$region" | jq
aws ec2 delete-security-group --group-id "$group_id" --region "$region" | jq
}
Expand Down
23 changes: 19 additions & 4 deletions apps/replicated_import_with_backup/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ services:
BACKUP_S3_BUCKET: 'weaviate-backups'
BACKUP_S3_USE_SSL: 'false'
AWS_ACCESS_KEY_ID: 'aws_access_key'
AWS_SECRET_KEY: 'aws_secret_key'
AWS_SECRET_KEY: 'aws_secret_key'
depends_on:
- backup-s3
- backup-s3

weaviate-node-2: # same as extended service to avoid reconfiguring cluster comm
extends:
Expand All @@ -26,7 +26,22 @@ services:
BACKUP_S3_BUCKET: 'weaviate-backups'
BACKUP_S3_USE_SSL: 'false'
AWS_ACCESS_KEY_ID: 'aws_access_key'
AWS_SECRET_KEY: 'aws_secret_key'
AWS_SECRET_KEY: 'aws_secret_key'
depends_on:
- backup-s3

weaviate-node-3: # same as extended service to avoid reconfiguring cluster comm
extends:
file: ./../weaviate/docker-compose-replication.yml
service: weaviate-node-3
container_name: weaviate-node-3
environment:
ENABLE_MODULES: 'backup-s3'
BACKUP_S3_ENDPOINT: 'backup-s3:9000'
BACKUP_S3_BUCKET: 'weaviate-backups'
BACKUP_S3_USE_SSL: 'false'
AWS_ACCESS_KEY_ID: 'aws_access_key'
AWS_SECRET_KEY: 'aws_secret_key'
depends_on:
- backup-s3

Expand Down Expand Up @@ -66,7 +81,7 @@ services:
/usr/bin/mc alias set chaos http://backup-s3:9000 aws_access_key aws_secret_key;
/usr/bin/mc rb --force chaos/weaviate-backups;
exit 0;
"
"

importer-schema-node-1:
build: ./../replicated-import/
Expand Down
4 changes: 4 additions & 0 deletions apps/upgrade-journey/containers.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ func (c *cluster) startWeaviateNode(ctx context.Context, nodeId int, version str
Logger: log.Default(),
ContainerRequest: testcontainers.ContainerRequest{
Name: fmt.Sprintf("%s-%d", c.hostname(nodeId), counter),
Hostname: c.hostname(nodeId),
Image: image,
Cmd: []string{"--host", "0.0.0.0", "--port", "8080", "--scheme", "http"},
Networks: []string{c.networkName},
Expand All @@ -122,6 +123,9 @@ func (c *cluster) startWeaviateNode(ctx context.Context, nodeId int, version str
"CLUSTER_DATA_BIND_PORT": "7101",
"CLUSTER_HOSTNAME": c.hostname(nodeId),
"CLUSTER_JOIN": c.allNodes(),
"RAFT_JOIN": c.hostname(0),
"RAFT_BOOTSTRAP_EXPECT": "1",
"RAFT_RECOVERY_TIMEOUT": "10",
"PERSISTENCE_LSM_ACCESS_STRATEGY": os.Getenv("PERSISTENCE_LSM_ACCESS_STRATEGY"),
},
Mounts: testcontainers.Mounts(testcontainers.BindMount(
Expand Down
20 changes: 16 additions & 4 deletions apps/upgrade-journey/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"log"
"math/rand"
"os"
"strconv"
"time"

"github.com/google/uuid"
Expand All @@ -32,14 +33,25 @@ func main() {
log.Fatal("missing MINIMUM_WEAVIATE_VERSION")
}

nodes, ok := os.LookupEnv("NUM_NODES")
if !ok {
log.Fatal("missing NUM_NODES")
}

var err error
numNodes, err := strconv.Atoi(nodes)
if err != nil {
log.Fatal(err)
}

versions, err = buildVersionList(ctx, minimumW, targetW)
if err != nil {
log.Fatal(err)
}

log.Printf("configured minimum version is %s", minimumW)
log.Printf("configured target version is %s", targetW)
log.Printf("number of nodes is %d", numNodes)
log.Printf("identified the following versions: %v", versions)

cfg := weaviate.Config{
Expand All @@ -48,16 +60,16 @@ func main() {
}
client := weaviate.New(cfg)

err = do(ctx, client)
err = do(ctx, client, numNodes)
if err != nil {
log.Fatal(err)
}
}

func do(ctx context.Context, client *weaviate.Client) error {
func do(ctx context.Context, client *weaviate.Client, numNodes int) error {
rand.Seed(time.Now().UnixNano())

c := newCluster(3)
c := newCluster(numNodes)

if err := c.startNetwork(ctx); err != nil {
return err
Expand Down Expand Up @@ -451,7 +463,7 @@ func importSourceObject(ctx context.Context, client *weaviate.Client,
version, targetID string,
) error {
var major, minor, patch int64
semver, ok := maybeParseSingleSemverWithoutLeadingVForImport(version)
semver, ok := maybeParseSingleSemverWithoutLeadingV(version)
if ok {
major, minor, patch = semver.major(), semver.minor(), semver.patch()
}
Expand Down
22 changes: 9 additions & 13 deletions apps/upgrade-journey/versions.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,23 +152,15 @@ func parseSingleSemverWithoutLeadingV(input string) semver {
return v
}

func maybeParseSingleSemverWithoutLeadingVForImport(input string) (semver, bool) {
ver, ok := maybeParseSingleSemverWithoutLeadingV(input)
if !ok {
// let's return a dummy version bc here we got a preview image
ver, err := hashicorpversion.NewSemver("0.0.0")
if err != nil {
panic("cannot parse 0.0.0 dummy version")
}
return semver{version: ver}, true
func maybeParseSingleSemverWithoutLeadingV(input string) (semver, bool) {
r := regexp.MustCompile(`^([0-9]+)\.([0-9]+)\.([0-9]+)$`)
if !r.MatchString(input) {
return semver{}, false
}
return ver, true
}

func maybeParseSingleSemverWithoutLeadingV(input string) (semver, bool) {
ver, err := hashicorpversion.NewSemver(input)
if err != nil {
return semver{version: nil}, false
panic(fmt.Errorf("cannot parse version %q: %w", input, err))
}

return semver{
Expand All @@ -184,6 +176,7 @@ func (s semverList) toStringList() []string {
return out
}

// Spawn a Weaviate container to get the version as it is not possible to infer the version from the container image
func getTargetVersion(ctx context.Context, version string) (string, error) {
weaviateImage := fmt.Sprintf("semitechnologies/weaviate:%s", version)
env := map[string]string{
Expand All @@ -192,6 +185,9 @@ func getTargetVersion(ctx context.Context, version string) (string, error) {
"QUERY_DEFAULTS_LIMIT": "20",
"PERSISTENCE_DATA_PATH": "./data",
"DEFAULT_VECTORIZER_MODULE": "none",
"CLUSTER_HOSTNAME": "weaviate-test",
"RAFT_JOIN": "weaviate-test",
"RAFT_BOOTSTRAP_EXPECT": "1",
}
req := testcontainers.ContainerRequest{
Image: weaviateImage,
Expand Down
10 changes: 7 additions & 3 deletions apps/weaviate/docker-compose-backup.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@ services:
DEFAULT_VECTORIZER_MODULE: 'none'
ENABLE_MODULES: 'backup-s3'
BACKUP_FILESYSTEM_PATH: '/var/lib/backups'
CLUSTER_HOSTNAME: 'node1'
CLUSTER_HOSTNAME: 'node1'
CLUSTER_GOSSIP_BIND_PORT: '7100'
CLUSTER_DATA_BIND_PORT: '7101'
RAFT_BOOTSTRAP_EXPECT: 1
RAFT_JOIN: "node1"
BACKUP_S3_ENDPOINT: 'backup-s3:9000'
BACKUP_S3_BUCKET: 'weaviate-backups'
AWS_ACCESS_KEY_ID: 'aws_access_key'
Expand Down Expand Up @@ -58,7 +60,7 @@ services:
DEFAULT_VECTORIZER_MODULE: 'none'
ENABLE_MODULES: 'backup-s3'
BACKUP_FILESYSTEM_PATH: '/var/lib/backups'
CLUSTER_HOSTNAME: 'node1'
CLUSTER_HOSTNAME: 'node1'
BACKUP_S3_ENDPOINT: 'backup-s3:9000'
BACKUP_S3_BUCKET: 'weaviate-backups'
AWS_ACCESS_KEY_ID: 'aws_access_key'
Expand Down Expand Up @@ -91,10 +93,12 @@ services:
DEFAULT_VECTORIZER_MODULE: 'none'
ENABLE_MODULES: 'backup-s3'
BACKUP_FILESYSTEM_PATH: '/var/lib/backups'
CLUSTER_HOSTNAME: 'node2'
CLUSTER_HOSTNAME: 'node2'
CLUSTER_GOSSIP_BIND_PORT: '7102'
CLUSTER_DATA_BIND_PORT: '7103'
CLUSTER_JOIN: 'weaviate-node-1:7100'
RAFT_JOIN: 'node1'
BOOTSTRAP_EXPECT: 1
BACKUP_S3_ENDPOINT: 'backup-s3:9000'
BACKUP_S3_BUCKET: 'weaviate-backups'
AWS_ACCESS_KEY_ID: 'aws_access_key'
Expand Down
12 changes: 9 additions & 3 deletions apps/weaviate/docker-compose-replication.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@ services:
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true'
PERSISTENCE_DATA_PATH: '/var/lib/weaviate'
DEFAULT_VECTORIZER_MODULE: 'none'
CLUSTER_HOSTNAME: 'node1'
CLUSTER_HOSTNAME: 'node1'
CLUSTER_GOSSIP_BIND_PORT: '7100'
CLUSTER_DATA_BIND_PORT: '7101'
RAFT_JOIN: 'node1,node2,node3'
RAFT_BOOTSTRAP_EXPECT: 3
PERSISTENCE_LSM_ACCESS_STRATEGY: '${PERSISTENCE_LSM_ACCESS_STRATEGY}'
DISABLE_TELEMETRY: 'true'

Expand All @@ -51,10 +53,12 @@ services:
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true'
PERSISTENCE_DATA_PATH: '/var/lib/weaviate'
DEFAULT_VECTORIZER_MODULE: 'none'
CLUSTER_HOSTNAME: 'node2'
CLUSTER_HOSTNAME: 'node2'
CLUSTER_GOSSIP_BIND_PORT: '7102'
CLUSTER_DATA_BIND_PORT: '7103'
CLUSTER_JOIN: 'weaviate-node-1:7100'
RAFT_JOIN: 'node1,node2,node3'
RAFT_BOOTSTRAP_EXPECT: 3
PERSISTENCE_LSM_ACCESS_STRATEGY: '${PERSISTENCE_LSM_ACCESS_STRATEGY}'
DISABLE_TELEMETRY: 'true'

Expand All @@ -80,9 +84,11 @@ services:
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true'
PERSISTENCE_DATA_PATH: '/var/lib/weaviate'
DEFAULT_VECTORIZER_MODULE: 'none'
CLUSTER_HOSTNAME: 'node3'
CLUSTER_HOSTNAME: 'node3'
CLUSTER_GOSSIP_BIND_PORT: '7104'
CLUSTER_DATA_BIND_PORT: '7105'
CLUSTER_JOIN: 'weaviate-node-1:7100'
RAFT_JOIN: 'node1,node2,node3'
RAFT_BOOTSTRAP_EXPECT: 3
PERSISTENCE_LSM_ACCESS_STRATEGY: '${PERSISTENCE_LSM_ACCESS_STRATEGY}'
DISABLE_TELEMETRY: 'true'
Loading
Loading