From 3b4d1567f837202ba0e600382626c6210eb2c544 Mon Sep 17 00:00:00 2001 From: devsjc <47188100+devsjc@users.noreply.github.com> Date: Fri, 24 Jan 2025 10:20:05 +0000 Subject: [PATCH] fix(compose): Correct dependencies and config files (#148) --- infrastructure/docker-compose.yaml | 53 +++++++++++++------ src/dagster_dags/assets/air/cams_eu.py | 2 +- .../assets/nwp/ceda_mo_um_global.py | 2 +- .../assets/nwp/ecmwf_ens_stat_india.py | 2 +- .../assets/nwp/ecmwf_hres_ifs_india.py | 2 +- .../assets/nwp/ecmwf_hres_ifs_west_europe.py | 2 +- .../assets/nwp/noaa-gfs-global.py | 2 +- .../assets/sat/eumetsat_iodc_lrv.py | 2 +- 8 files changed, 44 insertions(+), 23 deletions(-) diff --git a/infrastructure/docker-compose.yaml b/infrastructure/docker-compose.yaml index 308b8e5..f3f58c5 100644 --- a/infrastructure/docker-compose.yaml +++ b/infrastructure/docker-compose.yaml @@ -1,9 +1,13 @@ name: dagster +# Postgres variables, used in both the postgres container for configuration, +# and in the dagster services for connecting to the postgres container. +# The variable names are the same in both thanks to the dagster.yaml +# configuration file specifying such. x-postgres-variables: &postgres-variables POSTGRES_USER: ${POSTGRES_USER:-dagster_user} POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-dagster_password} - POSTGRES_DB: ${POSTGRES_DB:-dagster_db} + POSTGRES_DB: dagster_db POSTGRES_HOST: "dagster-postgres" x-dagster-configs: &dagster-configs @@ -15,8 +19,7 @@ x-dagster-configs: &dagster-configs services: # This service runs the postgres DB used by dagster for run storage, schedule storage, - # and event log storage. Depending on the hardware you run this Compose on, you may be able - # to reduce the interval and timeout in the healthcheck to speed up your `docker-compose up` times. + # and event log storage. dagster-postgres: image: postgres:16 container_name: dagster-postgres @@ -37,8 +40,9 @@ services: # run launcher to use this same image when launching runs in a new container as well. dagster-codeserver: container_name: dagster-codeserver - image: ghcr.io/openclimatefix/dagster-dags:devsjc-code-container + image: ghcr.io/openclimatefix/dagster-dags:latest restart: always + pull_policy: always environment: <<: *postgres-variables DAGSTER_CURRENT_IMAGE: "ghcr.io/openclimatefix/dagster-dags" @@ -53,6 +57,8 @@ services: container_name: dagster-webserver image: dagster/dagster-k8s:latest command: ["dagster-webserver", "-h", "0.0.0.0", "-p", "3008", "-w", "/opt/dagster/home/workspace.yaml"] + post_start: + - command: ["pip", "install", "dagster-docker"] # Required for using dagster_docker Pipes ports: - "3008:3008" environment: @@ -67,7 +73,7 @@ services: depends_on: dagster-postgres: condition: service_healthy - dagster-codeserver_local-archives: + dagster-codeserver: condition: service_started # This service runs the dagster-daemon process, which is responsible for taking runs @@ -76,6 +82,8 @@ services: container_name: dagster-daemon image: dagster/dagster-k8s:latest command: ["dagster-daemon", "run", "-w", "/opt/dagster/home/workspace.yaml"] + post_start: + - command: ["pip", "install", "dagster-docker"] # Required for using dagster_docker Pipes restart: on-failure environment: <<: *postgres-variables @@ -89,7 +97,7 @@ services: depends_on: dagster-postgres: condition: service_healthy - dagster-codeserver_local-archives: + dagster-codeserver: condition: service_started networks: @@ -118,35 +126,48 @@ configs: postgres_db: username: {"env": "POSTGRES_USER"} password: {"env": "POSTGRES_PASSWORD"} - hostname: {"env": "POSTGRES_HOST"} - db_name: {"env": "POSTGRES_DB"} + hostname: dagster-postgres + db_name: dagster_db port: 5432 local_artifact_storage: module: dagster.core.storage.root class: LocalArtifactStorage config: - base_dir: "/opt/dagster/local/" + base_dir: "/opt/dagster/local" run_coordinator: module: dagster.core.run_coordinator class: QueuedRunCoordinator config: - max_concurrent_runs: 30 + max_concurrent_runs: 15 tag_concurrency_limits: - key: "dagster/backfill" - limit: 15 + limit: 10 - key: "nwp-consumer" - limit: 1 + limit: 5 run_launcher: module: dagster_docker class: DockerRunLauncher config: - env_vars: - - POSTGRES_USER - - POSTGRES_PASSWORD - - POSTGRES_DB + network: dagster-network + env_vars: + - ENVIRONMENT=leo + - ECMWF_API_KEY= + - ECMWF_URL= + - ECMWF_EMAIL= + - EUMETSAT_CONSUMER_KEY= + - EUMETSAT_CONSUMER_SECRET= + - CEDA_FTP_USER= + - CEDA_FTP_PASS= + - HUGGINGFACE_TOKEN= + - SS_USER_ID= + - SS_API_KEY= + container_kwargs: + volumes: + - /var/run/docker.sock:/var/run/docker.sock # So jobs can launch docker pipes assets + - :/opt/dagster/local retention: schedule: diff --git a/src/dagster_dags/assets/air/cams_eu.py b/src/dagster_dags/assets/air/cams_eu.py index a01b2f0..3482efe 100644 --- a/src/dagster_dags/assets/air/cams_eu.py +++ b/src/dagster_dags/assets/air/cams_eu.py @@ -19,7 +19,7 @@ ARCHIVE_FOLDER = "/var/dagster-storage/air/cams-europe" if os.getenv("ENVIRONMENT", "local") == "leo": - ARCHIVE_FOLDER = "/mnt/storage_b/air/cams-europe" + ARCHIVE_FOLDER = "/mnt/storage_ssd_4tb/air/cams-europe" partitions_def: dg.TimeWindowPartitionsDefinition = dg.WeeklyPartitionsDefinition( start_date="2020-02-08", diff --git a/src/dagster_dags/assets/nwp/ceda_mo_um_global.py b/src/dagster_dags/assets/nwp/ceda_mo_um_global.py index 752f4b2..e9b0040 100644 --- a/src/dagster_dags/assets/nwp/ceda_mo_um_global.py +++ b/src/dagster_dags/assets/nwp/ceda_mo_um_global.py @@ -21,7 +21,7 @@ ARCHIVE_FOLDER = "/var/dagster-storage/nwp/ceda-mo-um-global" if os.getenv("ENVIRONMENT", "local") == "leo": - ARCHIVE_FOLDER = "/mnt/storage_b/nwp/ceda-mo-um-global" + ARCHIVE_FOLDER = "/mnt/storage_ssd_4tb/nwp/ceda-mo-um-global" partitions_def: dg.TimeWindowPartitionsDefinition = dg.MonthlyPartitionsDefinition( start_date="2019-01-01", diff --git a/src/dagster_dags/assets/nwp/ecmwf_ens_stat_india.py b/src/dagster_dags/assets/nwp/ecmwf_ens_stat_india.py index d5701bc..17d3ad6 100644 --- a/src/dagster_dags/assets/nwp/ecmwf_ens_stat_india.py +++ b/src/dagster_dags/assets/nwp/ecmwf_ens_stat_india.py @@ -21,7 +21,7 @@ ARCHIVE_FOLDER = "/var/dagster-storage/nwp/ecmwf-ens-stat-india" if os.getenv("ENVIRONMENT", "local") == "leo": - ARCHIVE_FOLDER = "/mnt/storage_b/nwp/ecmwf-ens-stat-india" + ARCHIVE_FOLDER = "/mnt/storage_ssd_4tb/nwp/ecmwf-ens-stat-india" partitions_def: dg.TimeWindowPartitionsDefinition = dg.MonthlyPartitionsDefinition( start_date="2020-01-01", diff --git a/src/dagster_dags/assets/nwp/ecmwf_hres_ifs_india.py b/src/dagster_dags/assets/nwp/ecmwf_hres_ifs_india.py index 0938f9a..beb6a85 100644 --- a/src/dagster_dags/assets/nwp/ecmwf_hres_ifs_india.py +++ b/src/dagster_dags/assets/nwp/ecmwf_hres_ifs_india.py @@ -20,7 +20,7 @@ ARCHIVE_FOLDER = "/var/dagster-storage/nwp/ecmwf-hres-ifs-india" if os.getenv("ENVIRONMENT", "local") == "leo": - ARCHIVE_FOLDER = "/mnt/storage_b/nwp/ecmwf-hres-ifs-india" + ARCHIVE_FOLDER = "/mnt/storage_ssd_4tb/nwp/ecmwf-hres-ifs-india" partitions_def: dg.TimeWindowPartitionsDefinition = dg.MonthlyPartitionsDefinition( start_date="2017-01-01", diff --git a/src/dagster_dags/assets/nwp/ecmwf_hres_ifs_west_europe.py b/src/dagster_dags/assets/nwp/ecmwf_hres_ifs_west_europe.py index 50e77b9..f768033 100644 --- a/src/dagster_dags/assets/nwp/ecmwf_hres_ifs_west_europe.py +++ b/src/dagster_dags/assets/nwp/ecmwf_hres_ifs_west_europe.py @@ -20,7 +20,7 @@ ARCHIVE_FOLDER = "/var/dagster-storage/nwp/ecmwf-hres-ifs-west-europe" if os.getenv("ENVIRONMENT", "local") == "leo": - ARCHIVE_FOLDER = "/mnt/storage_b/nwp/ecmwf-hres-ifs-west-europe" + ARCHIVE_FOLDER = "/mnt/storage_ssd_4tb/nwp/ecmwf-hres-ifs-west-europe" partitions_def: dg.TimeWindowPartitionsDefinition = dg.MonthlyPartitionsDefinition( start_date="2017-01-01", diff --git a/src/dagster_dags/assets/nwp/noaa-gfs-global.py b/src/dagster_dags/assets/nwp/noaa-gfs-global.py index 11d434c..9fa4429 100644 --- a/src/dagster_dags/assets/nwp/noaa-gfs-global.py +++ b/src/dagster_dags/assets/nwp/noaa-gfs-global.py @@ -21,7 +21,7 @@ ARCHIVE_FOLDER = "/var/dagster-storage/nwp/ncep-gfs-global" if os.getenv("ENVIRONMENT", "local") == "leo": - ARCHIVE_FOLDER = "/mnt/storage_b/nwp/ncep-gfs-global" + ARCHIVE_FOLDER = "/mnt/storage_ssd_4tb/nwp/ncep-gfs-global" partitions_def: dg.TimeWindowPartitionsDefinition = dg.MonthlyPartitionsDefinition( start_date="2021-01-01", diff --git a/src/dagster_dags/assets/sat/eumetsat_iodc_lrv.py b/src/dagster_dags/assets/sat/eumetsat_iodc_lrv.py index ff6711e..9935e22 100644 --- a/src/dagster_dags/assets/sat/eumetsat_iodc_lrv.py +++ b/src/dagster_dags/assets/sat/eumetsat_iodc_lrv.py @@ -21,7 +21,7 @@ ARCHIVE_FOLDER = "/var/dagster-storage/sat/eumetsat-iodc-lrv" if os.getenv("ENVIRONMENT", "local") == "leo": - ARCHIVE_FOLDER = "/mnt/storage_b/sat/eumetsat-iodc-lrv" + ARCHIVE_FOLDER = "/mnt/storage_ssd_4tb/sat/eumetsat-iodc-lrv" partitions_def: dg.TimeWindowPartitionsDefinition = dg.MonthlyPartitionsDefinition( start_date="2019-01-01",