Skip to content

Commit

Permalink
Update to uv v0.5.25
Browse files Browse the repository at this point in the history
  • Loading branch information
edsu committed Feb 5, 2025
1 parent a0cfdb1 commit 4addd48
Show file tree
Hide file tree
Showing 9 changed files with 788 additions and 204 deletions.
8 changes: 3 additions & 5 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,11 @@ jobs:
with:
args: 'format --check'

- name: Install dependencies
run: |
pip install -r requirements.txt
pip install -r requirements-dev.txt
- name: Install uv
run: pip3 install uv

- name: Run tests
run: pytest
run: uv run pytest
env:
AIRFLOW_VAR_DIMENSIONS_API_USER: ${{ secrets.AIRFLOW_VAR_DIMENSIONS_API_USER }}
AIRFLOW_VAR_DIMENSIONS_API_PASS: ${{ secrets.AIRFLOW_VAR_DIMENSIONS_API_PASS }}
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM apache/airflow:2.9.3-python3.12
FROM apache/airflow:2.10.4-python3.12

USER root
RUN apt-get update && apt-get install -y gcc git
Expand All @@ -8,6 +8,6 @@ ENV PYTHONPATH "${PYTHONPATH}:/opt/airflow/"
USER airflow

COPY rialto_airflow ./rialto_airflow
COPY requirements.txt ./
COPY pyproject.toml ./

RUN uv pip install --no-cache "apache-airflow==${AIRFLOW_VERSION}" -r requirements.txt
52 changes: 7 additions & 45 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,65 +67,27 @@ https://sul-rialto-dev.stanford.edu/authors?action=index&commit=Search&controlle
### Set-up

1. Install `uv` for dependency management as described in [the uv docs](https://github.com/astral-sh/uv?tab=readme-ov-file#getting-started).
2. Create a virtual environment:
```
uv venv
```

This will create the virtual environment at the default location of `.venv/`. `uv` automatically looks for a venv at this location when installing dependencies.

3. Activate the virtual environment:
```
source .venv/bin/activate
```


### Install dependencies
```
uv pip install -r requirements.txt
```

To add a dependency:
1. `uv pip install flask`
2. Add the dependency to `pyproject.toml`.
3. To re-generate the locked dependencies in `requirements.txt`:
```
uv pip compile pyproject.toml -o requirements.txt
```

Unlike poetry, uv's dependency resolution is not platform-agnostic. If we find we need to generate a requirements.txt for linux, we can use [uv's multi-platform resolution options](https://github.com/astral-sh/uv?tab=readme-ov-file#multi-platform-resolution).
If you need to add a dependency:
1. `uv add flask`

### Upgrading dependencies
To upgrade Python dependencies:
```
uv pip compile pyproject.toml -o requirements.txt --upgrade
uv lock --upgrade
```

## Run Tests

First enable the virtual environment:

```
source .venv/bin/activate
```

Then ensure the app dependencies and dev dependencies are installed.

```
uv pip install -r requirements.txt -r requirements-dev.txt
```

Then run the tests:

```
pytest
uv run pytest
```

### Linting and formatting

1. Run linting: `ruff check`
2. Automatically fix linting: `ruff check --fix`
3. Run formatting: `ruff format` (or `ruff format --check` to identify any unformatted files)
1. Run linting: `uv run ruff check`
2. Automatically fix linting: `uv run ruff check --fix`
3. Run formatting: `uv run ruff format` (or `uv run ruff format --check` to identify any unformatted files)

## Deployment

Expand Down
14 changes: 11 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ version = "0.1.0"
description = "Airflow app for harvesting data for open access analysis and research intelligence."
authors = [ {name = "Laura Wrubel", email = "[email protected]"}, {name = "Ed Summers", email = "[email protected]"}, {name = "Jacob Hill", email = "[email protected]"}]
requires-python = ">= 3.12"
package-mode = false
dependencies = [
"pandas",
"requests",
Expand All @@ -18,6 +19,13 @@ dependencies = [
pythonpath = ["."]
addopts = "-v"

[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"
[dependency-groups]
dev = [
"pytest>=8.3.4",
"python-dotenv>=1.0.1",
"ruff>=0.9.4",
]

#[build-system]
#requires = ["setuptools"]
#build-backend = "setuptools.build_meta"
4 changes: 0 additions & 4 deletions requirements-dev.txt

This file was deleted.

125 changes: 0 additions & 125 deletions requirements.txt

This file was deleted.

3 changes: 2 additions & 1 deletion rialto_airflow/harvest/openalex.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def publications_from_dois(dois: list):
# TODO: do we need this to stay within 100,000 requests / day API quota?
time.sleep(1)

doi_list = quote("|".join([doi for doi in doi_batch]))
doi_list = "|".join([doi for doi in doi_batch])
try:
for page in Works().filter(doi=doi_list).paginate(per_page=200):
for pub in page:
Expand Down Expand Up @@ -124,6 +124,7 @@ def normalize_publication(pub) -> dict:

FIELDS = [
"abstract_inverted_index",
"abstract_inverted_index_v3",
"authorships",
"apc_list",
"apc_paid",
Expand Down
25 changes: 6 additions & 19 deletions test/harvest/test_openalex.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,32 +49,23 @@ def test_publications_from_dois():
# look up the publication metadata for them
pubs = list(openalex.publications_from_dois(dois))

# >= is used because sometimes there can be multiple works for a DOI!
assert len(pubs) >= 231, "should paginate (page size=200)"
# > 200 is used because some of the 231 DOIs have been removed from openalex 🤷
assert len(pubs) > 200, "should paginate (page size=200)"
assert set(openalex.FIELDS) == set(pubs[0].keys()), "All fields accounted for."
assert len(pubs[0].keys()) == 53, "first publication has 53 columns"
assert len(pubs[1].keys()) == 53, "second publication has 53 columns"
assert len(pubs[0].keys()) == 54, "first publication has 54 columns"
assert len(pubs[1].keys()) == 54, "second publication has 54 columns"


def test_publications_from_invalid_dois(caplog):
# Error may change if OpenAlex API or pyalex changes
invalid_dois = ["doi-with-comma,a", "10.1145/3442188.3445922"]
assert len(list(openalex.publications_from_dois(invalid_dois))) == 1
assert (
"OpenAlex QueryError for doi-with-comma,a: Invalid query parameter"
in caplog.text
), "logs error message"


def test_publications_from_invalid_with_comma(caplog):
# OpenAlex will interpret a DOI string with a comma as two DOIs but
# Does not return a result for the first half even if valid. Will return an empty list
invalid_doi = ["10.1002/cncr.33546,-(wileyonlinelibrary.com)"]
assert len(list(openalex.publications_from_dois(invalid_doi))) == 0
assert (
"OpenAlex QueryError for 10.1002/cncr.33546,-(wileyonlinelibrary.com): Invalid query parameter"
in caplog.text
), "logs error message"


def test_publications_csv(tmp_path):
Expand Down Expand Up @@ -102,12 +93,8 @@ def test_publications_csv(tmp_path):


def test_pyalex_urlencoding():
# this might start working if https://github.com/J535D165/pyalex/issues/41 is fixed
with pytest.raises(pyalex.api.QueryError):
pyalex.Works().filter(doi="10.1207/s15327809jls0703&4_2").count() == 1

assert (
pyalex.Works().filter(doi="10.1207/s15327809jls0703%264_2").count() == 1
pyalex.Works().filter(doi="10.1207/s15327809jls0703&4_2").count() == 1
), "url encoding the & works with OpenAlex API"

assert (
Expand All @@ -119,7 +106,7 @@ def test_pyalex_urlencoding():
)
)
== 2
), "we handle url URL encoding DOIs until pyalex does"
), "we handle URL encoding"


@pytest.mark.skip(reason="This record no longer exhibits the problem")
Expand Down
Loading

0 comments on commit 4addd48

Please sign in to comment.