-
-
Notifications
You must be signed in to change notification settings - Fork 119
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Log datasette access IPs #3669
Log datasette access IPs #3669
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
load_module modules/ngx_http_realip_module.so; | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
daemon off; | ||
|
||
events { | ||
worker_connections 1024; | ||
} | ||
http { | ||
server { | ||
listen 8080; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In theory we could turn this (and the Dockerfile) into a jinja template, which might be cleaner, but it felt like gilding the lily to me. |
||
location / { | ||
proxy_pass http://127.0.0.1:8081/; | ||
proxy_set_header Host $host; | ||
set_real_ip_from 0.0.0.0/0; | ||
real_ip_header X-Forwarded-For; | ||
real_ip_recursive on; | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,4 +8,5 @@ ls | |
mv all_dbs.tar.zst /data | ||
zstd -f -d /data/all_dbs.tar.zst -o /data/all_dbs.tar | ||
tar -xf /data/all_dbs.tar --directory /data | ||
datasette serve --host 0.0.0.0 ${DATABASES} --cors --inspect-file inspect-data.json --metadata metadata.yml --setting sql_time_limit_ms 5000 --port $PORT | ||
datasette serve --host 0.0.0.0 ${DATABASES} --cors --inspect-file inspect-data.json --metadata metadata.yml --setting sql_time_limit_ms 5000 --port $DATASETTE_PORT > /dev/null & | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We want to suppress the |
||
nginx -c nginx.conf |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,18 +35,29 @@ | |
|
||
DOCKERFILE_TEMPLATE = """ | ||
FROM python:3.11.0-slim-bullseye | ||
COPY . /app | ||
WORKDIR /app | ||
|
||
RUN apt-get update | ||
RUN apt-get install -y zstd | ||
|
||
ENV DATASETTE_SECRET '{datasette_secret}' | ||
ENV DATABASES '{databases}' | ||
ENV DATASETTE_PORT 8081 | ||
ENV NGINX_PORT 8080 | ||
|
||
RUN apt-get update | ||
RUN apt-get install -y zstd nginx | ||
RUN pip install -U datasette datasette-cluster-map datasette-vega datasette-block-robots | ||
ENV PORT 8080 | ||
EXPOSE 8080 | ||
|
||
# set up nginx + enable real IP module | ||
COPY nginx.conf /usr/share/nginx/nginx.conf | ||
COPY 50-mod-http-realip.conf /etc/nginx/modules-enabled/ | ||
|
||
# the two symlinks allow nginx logs to get written out to stdout/stderr | ||
RUN mkdir /data \ | ||
&& ln -sf /dev/stdout /var/log/nginx/access.log \ | ||
&& ln -sf /dev/stderr /var/log/nginx/error.log | ||
|
||
COPY . /app | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moved this down to the bottom since all the other setup changes much less frequently than the contents of the Docker context - this helps with build caching. |
||
WORKDIR /app | ||
|
||
EXPOSE ${{NGINX_PORT}} | ||
ENV DATASETTE_SECRET '{datasette_secret}' | ||
CMD ["./run.sh"] | ||
""" | ||
|
||
|
@@ -106,7 +117,8 @@ def inspect_data(datasets: list[str], pudl_output: Path) -> str: | |
"-l", | ||
"deploy", | ||
flag_value="local", | ||
help="Deploy Datasette locally for testing or debugging purposes.", | ||
help="Deploy Datasette locally for testing or debugging purposes. Note that" | ||
"you have to stop the docker instance manually to terminate this server.", | ||
) | ||
@click.option( | ||
"--metadata", | ||
|
@@ -138,22 +150,48 @@ def deploy_datasette( | |
|
||
python publish.py --fly -- --build-only | ||
""" | ||
logging.info(f"Deploying to {deploy.upper()}...") | ||
pudl_output = PudlPaths().pudl_output | ||
|
||
pudl_output = PudlPaths().pudl_output | ||
metadata_yml = DatasetteMetadata.from_data_source_ids(pudl_output).to_yaml() | ||
|
||
all_databases = ( | ||
["pudl.sqlite"] | ||
+ sorted(str(p.name) for p in pudl_output.glob("ferc*.sqlite")) | ||
+ ["censusdp1tract.sqlite"] | ||
) | ||
databases = list(only_databases if only_databases else all_databases) | ||
|
||
# Make sure we have the expected metadata for databases | ||
# headed to deployment. | ||
if deploy != "metadata": | ||
check_tables_have_metadata(metadata_yml, databases) | ||
fly_dir = Path(__file__).parent.absolute() / "fly" | ||
docker_path = fly_dir / "Dockerfile" | ||
inspect_path = fly_dir / "inspect-data.json" | ||
metadata_path = fly_dir / "metadata.yml" | ||
metadata_yml = DatasetteMetadata.from_data_source_ids(pudl_output).to_yaml() | ||
|
||
logging.info(f"Writing Datasette metadata to: {metadata_path}") | ||
with metadata_path.open("w") as f: | ||
f.write(metadata_yml) | ||
check_tables_have_metadata(metadata_yml, databases) | ||
|
||
if deploy == "metadata": | ||
logging.info("Only writing metadata. Aborting now.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You maybe forgot to abort the script here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure did, fixing! Good catch :) Fortunately we only actually deploy if we actively choose a valid deployment target 😅 |
||
return 0 | ||
|
||
logging.info(f"Inspecting DBs for datasette: {databases}...") | ||
inspect_output = inspect_data(databases, pudl_output) | ||
with inspect_path.open("w") as f: | ||
f.write(json.dumps(inspect_output)) | ||
|
||
logging.info("Writing Dockerfile...") | ||
with docker_path.open("w") as f: | ||
f.write(make_dockerfile(databases)) | ||
|
||
logging.info(f"Compressing {databases} and putting into docker context...") | ||
check_call( # noqa: S603 | ||
["tar", "-a", "-czvf", fly_dir / "all_dbs.tar.zst"] + databases, | ||
cwd=pudl_output, | ||
) | ||
|
||
# OK, now we have a Dockerfile + the right context. Time to run the dang | ||
# container somehwere. | ||
if deploy in {"production", "staging"}: | ||
fly_dir = Path(__file__).parent.absolute() / "fly" | ||
logging.info(f"Deploying {deploy} to fly.io...") | ||
|
@@ -189,21 +227,13 @@ def deploy_datasette( | |
|
||
elif deploy == "local": | ||
logging.info("Running Datasette locally...") | ||
metadata_path = pudl_output / "metadata.yml" | ||
logging.info(f"Writing Datasette metadata to: {metadata_path}") | ||
with metadata_path.open("w") as f: | ||
f.write(metadata_yml) | ||
|
||
check_call( # noqa: S603 | ||
["/usr/bin/env", "datasette", "serve", "-m", "metadata.yml"] + databases, | ||
cwd=pudl_output, | ||
["/usr/bin/env", "docker", "build", "-t", "pudl_datasette:local", "."], | ||
cwd=fly_dir, | ||
) | ||
check_call( # noqa: S603 | ||
["/usr/bin/env", "docker", "run", "-p", "8080:8080", "pudl_datasette:local"] | ||
) | ||
|
||
elif deploy == "metadata": | ||
metadata_path = Path.cwd() / "metadata.yml" | ||
logging.info(f"Writing Datasette metadata to: {metadata_path}") | ||
with metadata_path.open("w") as f: | ||
f.write(metadata_yml) | ||
|
||
else: | ||
logging.error(f"Unrecognized deployment destination: {deploy=}") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2218,7 +2218,7 @@ def from_data_source_ids( | |
xbrl_resources=xbrl_resources, | ||
) | ||
|
||
def to_yaml(self) -> None: | ||
def to_yaml(self) -> str: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. mypy was complaining about this. |
||
"""Output database, table, and column metadata to YAML file.""" | ||
template = _get_jinja_environment().get_template("datasette-metadata.yml.jinja") | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This could technically also live in
nginx.conf
but this is more idiomatic.