Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Log datasette access IPs #3669

Merged
merged 5 commits into from
Jul 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions devtools/datasette/fly/50-mod-http-realip.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
load_module modules/ngx_http_realip_module.so;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could technically also live in nginx.conf but this is more idiomatic.

17 changes: 17 additions & 0 deletions devtools/datasette/fly/nginx.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
daemon off;

events {
worker_connections 1024;
}
http {
server {
listen 8080;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In theory we could turn this (and the Dockerfile) into a jinja template, which might be cleaner, but it felt like gilding the lily to me.

location / {
proxy_pass http://127.0.0.1:8081/;
proxy_set_header Host $host;
set_real_ip_from 0.0.0.0/0;
real_ip_header X-Forwarded-For;
real_ip_recursive on;
}
}
}
3 changes: 2 additions & 1 deletion devtools/datasette/fly/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ ls
mv all_dbs.tar.zst /data
zstd -f -d /data/all_dbs.tar.zst -o /data/all_dbs.tar
tar -xf /data/all_dbs.tar --directory /data
datasette serve --host 0.0.0.0 ${DATABASES} --cors --inspect-file inspect-data.json --metadata metadata.yml --setting sql_time_limit_ms 5000 --port $PORT
datasette serve --host 0.0.0.0 ${DATABASES} --cors --inspect-file inspect-data.json --metadata metadata.yml --setting sql_time_limit_ms 5000 --port $DATASETTE_PORT > /dev/null &
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We want to suppress the datasette serve logs since we get the same information in the nginx access logs.

nginx -c nginx.conf
88 changes: 59 additions & 29 deletions devtools/datasette/publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,29 @@

DOCKERFILE_TEMPLATE = """
FROM python:3.11.0-slim-bullseye
COPY . /app
WORKDIR /app

RUN apt-get update
RUN apt-get install -y zstd

ENV DATASETTE_SECRET '{datasette_secret}'
ENV DATABASES '{databases}'
ENV DATASETTE_PORT 8081
ENV NGINX_PORT 8080

RUN apt-get update
RUN apt-get install -y zstd nginx
RUN pip install -U datasette datasette-cluster-map datasette-vega datasette-block-robots
ENV PORT 8080
EXPOSE 8080

# set up nginx + enable real IP module
COPY nginx.conf /usr/share/nginx/nginx.conf
COPY 50-mod-http-realip.conf /etc/nginx/modules-enabled/

# the two symlinks allow nginx logs to get written out to stdout/stderr
RUN mkdir /data \
&& ln -sf /dev/stdout /var/log/nginx/access.log \
&& ln -sf /dev/stderr /var/log/nginx/error.log

COPY . /app
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved this down to the bottom since all the other setup changes much less frequently than the contents of the Docker context - this helps with build caching.

WORKDIR /app

EXPOSE ${{NGINX_PORT}}
ENV DATASETTE_SECRET '{datasette_secret}'
CMD ["./run.sh"]
"""

Expand Down Expand Up @@ -106,7 +117,8 @@ def inspect_data(datasets: list[str], pudl_output: Path) -> str:
"-l",
"deploy",
flag_value="local",
help="Deploy Datasette locally for testing or debugging purposes.",
help="Deploy Datasette locally for testing or debugging purposes. Note that"
"you have to stop the docker instance manually to terminate this server.",
)
@click.option(
"--metadata",
Expand Down Expand Up @@ -138,22 +150,48 @@ def deploy_datasette(

python publish.py --fly -- --build-only
"""
logging.info(f"Deploying to {deploy.upper()}...")
pudl_output = PudlPaths().pudl_output

pudl_output = PudlPaths().pudl_output
metadata_yml = DatasetteMetadata.from_data_source_ids(pudl_output).to_yaml()

all_databases = (
["pudl.sqlite"]
+ sorted(str(p.name) for p in pudl_output.glob("ferc*.sqlite"))
+ ["censusdp1tract.sqlite"]
)
databases = list(only_databases if only_databases else all_databases)

# Make sure we have the expected metadata for databases
# headed to deployment.
if deploy != "metadata":
check_tables_have_metadata(metadata_yml, databases)
fly_dir = Path(__file__).parent.absolute() / "fly"
docker_path = fly_dir / "Dockerfile"
inspect_path = fly_dir / "inspect-data.json"
metadata_path = fly_dir / "metadata.yml"
metadata_yml = DatasetteMetadata.from_data_source_ids(pudl_output).to_yaml()

logging.info(f"Writing Datasette metadata to: {metadata_path}")
with metadata_path.open("w") as f:
f.write(metadata_yml)
check_tables_have_metadata(metadata_yml, databases)

if deploy == "metadata":
logging.info("Only writing metadata. Aborting now.")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You maybe forgot to abort the script here?

Copy link
Member Author

@jdangerx jdangerx Jun 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure did, fixing! Good catch :) Fortunately we only actually deploy if we actively choose a valid deployment target 😅

return 0

logging.info(f"Inspecting DBs for datasette: {databases}...")
inspect_output = inspect_data(databases, pudl_output)
with inspect_path.open("w") as f:
f.write(json.dumps(inspect_output))

logging.info("Writing Dockerfile...")
with docker_path.open("w") as f:
f.write(make_dockerfile(databases))

logging.info(f"Compressing {databases} and putting into docker context...")
check_call( # noqa: S603
["tar", "-a", "-czvf", fly_dir / "all_dbs.tar.zst"] + databases,
cwd=pudl_output,
)

# OK, now we have a Dockerfile + the right context. Time to run the dang
# container somehwere.
if deploy in {"production", "staging"}:
fly_dir = Path(__file__).parent.absolute() / "fly"
logging.info(f"Deploying {deploy} to fly.io...")
Expand Down Expand Up @@ -189,21 +227,13 @@ def deploy_datasette(

elif deploy == "local":
logging.info("Running Datasette locally...")
metadata_path = pudl_output / "metadata.yml"
logging.info(f"Writing Datasette metadata to: {metadata_path}")
with metadata_path.open("w") as f:
f.write(metadata_yml)

check_call( # noqa: S603
["/usr/bin/env", "datasette", "serve", "-m", "metadata.yml"] + databases,
cwd=pudl_output,
["/usr/bin/env", "docker", "build", "-t", "pudl_datasette:local", "."],
cwd=fly_dir,
)
check_call( # noqa: S603
["/usr/bin/env", "docker", "run", "-p", "8080:8080", "pudl_datasette:local"]
)

elif deploy == "metadata":
metadata_path = Path.cwd() / "metadata.yml"
logging.info(f"Writing Datasette metadata to: {metadata_path}")
with metadata_path.open("w") as f:
f.write(metadata_yml)

else:
logging.error(f"Unrecognized deployment destination: {deploy=}")
Expand Down
2 changes: 1 addition & 1 deletion src/pudl/metadata/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2218,7 +2218,7 @@ def from_data_source_ids(
xbrl_resources=xbrl_resources,
)

def to_yaml(self) -> None:
def to_yaml(self) -> str:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mypy was complaining about this.

"""Output database, table, and column metadata to YAML file."""
template = _get_jinja_environment().get_template("datasette-metadata.yml.jinja")

Expand Down