Skip to content

Commit

Permalink
v1.1.1 - fix aws volumes and queue limits (#90)
Browse files Browse the repository at this point in the history
- fix aws volume handling by adding a separate function to handle setting up volumes
- fix potential issue with queue limitation
  • Loading branch information
ndharasz authored Jul 13, 2024
1 parent a3eeee9 commit 0570694
Show file tree
Hide file tree
Showing 15 changed files with 176 additions and 138 deletions.
1 change: 1 addition & 0 deletions numerai/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,6 @@ def main():
numerai.add_command(upgrade.upgrade)
numerai.add_command(misc.copy_example)
numerai.add_command(misc.list_constants)
numerai.add_command(misc.add_volume_aws)
numerai.add_command(destroy_all.destroy_all)
numerai()
1 change: 1 addition & 0 deletions numerai/cli/destroy_all.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Destroy command for Numerai CLI"""

import click
from numerapi import base_api

Expand Down
57 changes: 47 additions & 10 deletions numerai/cli/misc.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import json

import click

from numerai.cli.constants import *
from numerai.cli.util import files
from numerai.cli.util.terraform import apply_terraform

import click


@click.command()
Expand All @@ -12,13 +11,15 @@
"-e",
type=click.Choice(EXAMPLES),
default=DEFAULT_EXAMPLE,
help=f"Specify the example to copy, defaults to {DEFAULT_EXAMPLE}. " f"Options are {EXAMPLES}.",
help=f"Specify the example to copy, defaults to {DEFAULT_EXAMPLE}. "
f"Options are {EXAMPLES}.",
)
@click.option(
"--dest",
"-d",
type=str,
help=f"Destination folder to which example code is written. " f"Defaults to the name of the example.",
help=f"Destination folder to which example code is written. "
f"Defaults to the name of the example.",
)
@click.option("--verbose", "-v", is_flag=True)
def copy_example(example, dest, verbose):
Expand Down Expand Up @@ -49,14 +50,50 @@ def list_constants():
click.secho(
f" {size} -> cpus: {preset[0] / 1024}, "
f"mem: {preset[1] / 1024} GB {suffix}",
fg="green"
if size == DEFAULT_SIZE or size == DEFAULT_SIZE_GCP
else "yellow",
fg=(
"green"
if size == DEFAULT_SIZE or size == DEFAULT_SIZE_GCP
else "yellow"
),
)
click.secho("Due to GCP Cloud Run size constraints, 'mem' sizes are not allowed when using GCP.")
click.secho(
"Due to GCP Cloud Run size constraints, 'mem' sizes are not allowed when using GCP."
)
click.secho(
"For AWS, use one of these sizes, or specify your own CPU and Memory in cores and GB using --cpu and --memory options.\n"
"See https://learn.microsoft.com/en-us/azure/container-apps/containers#configuration for Azure,\n"
"or https://cloud.google.com/run/docs/configuring/services/memory-limits for GCP \n"
"to learn more info about allowed size presets for those providers."
)


@click.command()
@click.option(
"--size",
"-s",
type=int,
required=True,
help="Specify the volume size in GB you'd like your AWS nodes to share.",
)
@click.option("--verbose", "-v", is_flag=True)
def add_volume_aws(size, verbose):
"""
Set the volume size for AWS nodes. This volume is shared by all nodes.
"""
click.secho("Setting volume size for AWS nodes...", fg="yellow")
# get nodes config object
nodes_config = files.load_or_init_nodes()
print(nodes_config)
# set volume size for all nodes to same size
for node in nodes_config:
nodes_config[node]["volume"] = size
files.store_config(NODES_PATH, nodes_config)
files.copy_file(
NODES_PATH,
f"{CONFIG_PATH}/{PROVIDER_AWS}/",
force=True,
verbose=verbose,
)
click.secho(f"Applying terraform to add {size} GB volume...", fg="yellow")
apply_terraform(nodes_config, [PROVIDER_AWS], PROVIDER_AWS, verbose=verbose)
click.secho("Volume size updated successfully!", fg="green")
1 change: 0 additions & 1 deletion numerai/cli/node/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ def tournaments_dict():
def get_models(tournament):
napi = base_api.Api(*get_numerai_keys())
models = napi.get_models(tournament)
tournaments = napi.raw_query('query { tournaments { name tournament } }')
name_prefix = tournaments_dict()[tournament]
model_dict = {}
for model_name, model_id in models.items():
Expand Down
92 changes: 11 additions & 81 deletions numerai/cli/node/config.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
"""Config command for Numerai CLI"""

import json
import os
import click

from numerapi import base_api
from numerai.cli.constants import (
DEFAULT_PROVIDER,
Expand All @@ -19,15 +18,21 @@
CONFIG_PATH,
PROVIDER_GCP,
)
from numerai.cli.util.docker import terraform, check_for_dockerfile
from numerai.cli.util import docker
from numerai.cli.util.files import (
load_or_init_nodes,
store_config,
copy_example,
copy_file,
)
from numerai.cli.util.keys import get_provider_keys, get_numerai_keys, load_or_init_keys
from numerai.cli.util.keys import get_provider_keys, get_numerai_keys
from numerai.cli.util.terraform import (
apply_terraform,
create_azure_registry,
create_gcp_registry,
)

import click


@click.command()
Expand Down Expand Up @@ -95,12 +100,6 @@
help="Forces your webhook to register with Numerai. "
"Use in conjunction with options that prevent webhook auto-registering.",
)
@click.option(
"--volume",
"-v",
type=int,
help="Specify additional block storage in GB. Currently only supported in AWS.",
)
@click.pass_context
def config(
ctx,
Expand All @@ -114,7 +113,6 @@ def config(
cron,
timeout_minutes,
register_webhook,
volume,
):
"""
Uses Terraform to create a full Numerai Compute cluster in your desired provider.
Expand Down Expand Up @@ -234,7 +232,7 @@ def config(
click.secho(f'Current node config: "{node_conf}"...')

# double check there is a dockerfile in the path we are about to configure
check_for_dockerfile(nodes_config[node]["path"])
docker.check_for_dockerfile(nodes_config[node]["path"])
store_config(NODES_PATH, nodes_config)

# Added after tf directory restructure: copy nodes.json to providers' tf directory
Expand Down Expand Up @@ -284,43 +282,10 @@ def config(
docker.tag("hello-world:linux", node_conf["docker_repo"], verbose)
docker.push(node_conf["docker_repo"], verbose)
nodes_config[node] = node_conf
elif provider == "aws":
if volume is not None:
node_conf["volume"] = volume

store_config(NODES_PATH, nodes_config)

# Apply terraform for any affected provider
for affected_provider in affected_providers:
if affected_provider in PROVIDERS:
click.secho(f"Updating resources in {affected_provider}")
terraform(
"apply -auto-approve",
verbose,
affected_provider,
env_vars=load_or_init_keys(affected_provider),
inputs={"node_config_file": "nodes.json"},
)
else:
click.secho(f"provider {affected_provider} not supported", fg="red")
exit(1)
click.secho("cloud resources created successfully", fg="green")

# terraform output for node config, same for aws and azure
click.echo(f"saving node configuration to {NODES_PATH}...")

res = terraform(f"output -json {provider}_nodes", verbose, provider).decode("utf-8")
try:
nodes = json.loads(res)
except json.JSONDecodeError:
click.secho("failed to save node configuration, please retry.", fg="red")
return
for node_name, data in nodes.items():
nodes_config[node_name].update(data)

store_config(NODES_PATH, nodes_config)
if verbose:
click.secho(f"new config:\n{json.dumps(load_or_init_nodes(), indent=2)}")
apply_terraform(nodes_config, affected_providers, provider, verbose)

webhook_url = nodes_config[node]["webhook_url"]
napi = base_api.Api(*get_numerai_keys())
Expand All @@ -336,38 +301,3 @@ def config(
"Prediction Node configured successfully. " "Next: deploy and test your node",
fg="green",
)


def create_azure_registry(provider, provider_keys, verbose):
"""Creates a registry for azure"""
terraform("init -upgrade", verbose, provider)
terraform(
'apply -target="azurerm_container_registry.registry[0]" -target="azurerm_resource_group.acr_rg[0]" -auto-approve ',
verbose,
"azure",
env_vars=provider_keys,
inputs={"node_config_file": "nodes.json"},
)
res = terraform("output -json acr_repo_details", True, provider).decode("utf-8")
return json.loads(res)


def create_gcp_registry(provider, verbose):
"""Creates a registry for GCP"""
terraform("init -upgrade", verbose, provider)
terraform(
'apply -target="google_project_service.cloud_resource_manager" -auto-approve ',
verbose,
"gcp",
inputs={"node_config_file": "nodes.json"},
)
terraform(
'apply -target="google_artifact_registry_repository.registry[0]" -auto-approve ',
verbose,
"gcp",
inputs={"node_config_file": "nodes.json"},
)
res = terraform("output -json artifact_registry_details", True, provider).decode(
"utf-8"
)
return json.loads(res)
3 changes: 2 additions & 1 deletion numerai/cli/node/destroy.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Destroy command for Numerai CLI"""

import click
from numerapi import base_api

Expand Down Expand Up @@ -41,7 +42,7 @@ def destroy(ctx, preserve_node_config, verbose):
fg="red",
)
return

if not preserve_node_config:
click.secho("backing up nodes.json...")
copy_file(NODES_PATH, f"{NODES_PATH}.backup", force=True, verbose=True)
Expand Down
30 changes: 17 additions & 13 deletions numerai/cli/setup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Setup command for Numerai CLI"""

import click
import logging

Expand All @@ -16,8 +17,9 @@
prompt=True,
help=f"Initialize with this providers API keys.",
)
@click.option("--skip-key-setup", "-s", is_flag=True)
@click.option("--verbose", "-v", is_flag=True)
def setup(provider, verbose):
def setup(provider, skip_key_setup, verbose):
"""
Initializes cli and provider API keys.
"""
Expand All @@ -38,20 +40,22 @@ def setup(provider, verbose):
return

# setup numerai keys
click.secho(
"Initializing numerai keys " "(press enter to keep value in brackets)...",
fg="yellow",
)
maybe_create(KEYS_PATH, protected=True)
config_numerai_keys()
if not skip_key_setup:
click.secho(
"Initializing numerai keys " "(press enter to keep value in brackets)...",
fg="yellow",
)
maybe_create(KEYS_PATH, protected=True)
config_numerai_keys()

# setup provider keys
click.secho(
f"\nInitializing {provider} keys "
f"(press enter to keep value in brackets)...",
fg="yellow",
)
config_provider_keys(provider)
if not skip_key_setup:
click.secho(
f"\nInitializing {provider} keys "
f"(press enter to keep value in brackets)...",
fg="yellow",
)
config_provider_keys(provider)

# copy tf files
click.secho("copying terraform files...")
Expand Down
Loading

0 comments on commit 0570694

Please sign in to comment.