diff --git a/devtools/datasette/fly/50-mod-http-realip.conf b/devtools/datasette/fly/50-mod-http-realip.conf new file mode 100644 index 0000000000..02eb895838 --- /dev/null +++ b/devtools/datasette/fly/50-mod-http-realip.conf @@ -0,0 +1 @@ +load_module modules/ngx_http_realip_module.so; diff --git a/devtools/datasette/fly/nginx.conf b/devtools/datasette/fly/nginx.conf new file mode 100644 index 0000000000..ca2c983a1a --- /dev/null +++ b/devtools/datasette/fly/nginx.conf @@ -0,0 +1,17 @@ +daemon off; + +events { + worker_connections 1024; +} +http { + server { + listen 8080; + location / { + proxy_pass http://127.0.0.1:8081/; + proxy_set_header Host $host; + set_real_ip_from 0.0.0.0/0; + real_ip_header X-Forwarded-For; + real_ip_recursive on; + } + } +} diff --git a/devtools/datasette/fly/run.sh b/devtools/datasette/fly/run.sh index 426ae11bd2..0c201518a3 100755 --- a/devtools/datasette/fly/run.sh +++ b/devtools/datasette/fly/run.sh @@ -8,4 +8,5 @@ ls mv all_dbs.tar.zst /data zstd -f -d /data/all_dbs.tar.zst -o /data/all_dbs.tar tar -xf /data/all_dbs.tar --directory /data -datasette serve --host 0.0.0.0 ${DATABASES} --cors --inspect-file inspect-data.json --metadata metadata.yml --setting sql_time_limit_ms 5000 --port $PORT +datasette serve --host 0.0.0.0 ${DATABASES} --cors --inspect-file inspect-data.json --metadata metadata.yml --setting sql_time_limit_ms 5000 --port $DATASETTE_PORT > /dev/null & +nginx -c nginx.conf diff --git a/devtools/datasette/publish.py b/devtools/datasette/publish.py index ef6805506a..01071549ee 100644 --- a/devtools/datasette/publish.py +++ b/devtools/datasette/publish.py @@ -35,18 +35,29 @@ DOCKERFILE_TEMPLATE = """ FROM python:3.11.0-slim-bullseye -COPY . /app -WORKDIR /app -RUN apt-get update -RUN apt-get install -y zstd - -ENV DATASETTE_SECRET '{datasette_secret}' ENV DATABASES '{databases}' +ENV DATASETTE_PORT 8081 +ENV NGINX_PORT 8080 + +RUN apt-get update +RUN apt-get install -y zstd nginx RUN pip install -U datasette datasette-cluster-map datasette-vega datasette-block-robots -ENV PORT 8080 -EXPOSE 8080 +# set up nginx + enable real IP module +COPY nginx.conf /usr/share/nginx/nginx.conf +COPY 50-mod-http-realip.conf /etc/nginx/modules-enabled/ + +# the two symlinks allow nginx logs to get written out to stdout/stderr +RUN mkdir /data \ + && ln -sf /dev/stdout /var/log/nginx/access.log \ + && ln -sf /dev/stderr /var/log/nginx/error.log + +COPY . /app +WORKDIR /app + +EXPOSE ${{NGINX_PORT}} +ENV DATASETTE_SECRET '{datasette_secret}' CMD ["./run.sh"] """ @@ -106,7 +117,8 @@ def inspect_data(datasets: list[str], pudl_output: Path) -> str: "-l", "deploy", flag_value="local", - help="Deploy Datasette locally for testing or debugging purposes.", + help="Deploy Datasette locally for testing or debugging purposes. Note that" + "you have to stop the docker instance manually to terminate this server.", ) @click.option( "--metadata", @@ -138,11 +150,9 @@ def deploy_datasette( python publish.py --fly -- --build-only """ + logging.info(f"Deploying to {deploy.upper()}...") pudl_output = PudlPaths().pudl_output - pudl_output = PudlPaths().pudl_output - metadata_yml = DatasetteMetadata.from_data_source_ids(pudl_output).to_yaml() - all_databases = ( ["pudl.sqlite"] + sorted(str(p.name) for p in pudl_output.glob("ferc*.sqlite")) @@ -150,10 +160,38 @@ def deploy_datasette( ) databases = list(only_databases if only_databases else all_databases) - # Make sure we have the expected metadata for databases - # headed to deployment. - if deploy != "metadata": - check_tables_have_metadata(metadata_yml, databases) + fly_dir = Path(__file__).parent.absolute() / "fly" + docker_path = fly_dir / "Dockerfile" + inspect_path = fly_dir / "inspect-data.json" + metadata_path = fly_dir / "metadata.yml" + metadata_yml = DatasetteMetadata.from_data_source_ids(pudl_output).to_yaml() + + logging.info(f"Writing Datasette metadata to: {metadata_path}") + with metadata_path.open("w") as f: + f.write(metadata_yml) + check_tables_have_metadata(metadata_yml, databases) + + if deploy == "metadata": + logging.info("Only writing metadata. Aborting now.") + return 0 + + logging.info(f"Inspecting DBs for datasette: {databases}...") + inspect_output = inspect_data(databases, pudl_output) + with inspect_path.open("w") as f: + f.write(json.dumps(inspect_output)) + + logging.info("Writing Dockerfile...") + with docker_path.open("w") as f: + f.write(make_dockerfile(databases)) + + logging.info(f"Compressing {databases} and putting into docker context...") + check_call( # noqa: S603 + ["tar", "-a", "-czvf", fly_dir / "all_dbs.tar.zst"] + databases, + cwd=pudl_output, + ) + + # OK, now we have a Dockerfile + the right context. Time to run the dang + # container somehwere. if deploy in {"production", "staging"}: fly_dir = Path(__file__).parent.absolute() / "fly" logging.info(f"Deploying {deploy} to fly.io...") @@ -189,21 +227,13 @@ def deploy_datasette( elif deploy == "local": logging.info("Running Datasette locally...") - metadata_path = pudl_output / "metadata.yml" - logging.info(f"Writing Datasette metadata to: {metadata_path}") - with metadata_path.open("w") as f: - f.write(metadata_yml) - check_call( # noqa: S603 - ["/usr/bin/env", "datasette", "serve", "-m", "metadata.yml"] + databases, - cwd=pudl_output, + ["/usr/bin/env", "docker", "build", "-t", "pudl_datasette:local", "."], + cwd=fly_dir, + ) + check_call( # noqa: S603 + ["/usr/bin/env", "docker", "run", "-p", "8080:8080", "pudl_datasette:local"] ) - - elif deploy == "metadata": - metadata_path = Path.cwd() / "metadata.yml" - logging.info(f"Writing Datasette metadata to: {metadata_path}") - with metadata_path.open("w") as f: - f.write(metadata_yml) else: logging.error(f"Unrecognized deployment destination: {deploy=}") diff --git a/src/pudl/metadata/classes.py b/src/pudl/metadata/classes.py index 1b0a532a58..4ef2d4b0fd 100644 --- a/src/pudl/metadata/classes.py +++ b/src/pudl/metadata/classes.py @@ -2218,7 +2218,7 @@ def from_data_source_ids( xbrl_resources=xbrl_resources, ) - def to_yaml(self) -> None: + def to_yaml(self) -> str: """Output database, table, and column metadata to YAML file.""" template = _get_jinja_environment().get_template("datasette-metadata.yml.jinja")