Skip to content

Commit

Permalink
Merge pull request #80 from Photoroom/ben/rust_files
Browse files Browse the repository at this point in the history
[Rust] Add file serve support & refactor
  • Loading branch information
blefaudeux committed Mar 2, 2025
2 parents a279d98 + ef27aa7 commit 6efd135
Show file tree
Hide file tree
Showing 20 changed files with 606 additions and 413 deletions.
71 changes: 0 additions & 71 deletions .github/workflows/ci-cd.yml

This file was deleted.

109 changes: 26 additions & 83 deletions .github/workflows/rust-py.yml
Original file line number Diff line number Diff line change
@@ -1,80 +1,14 @@
# name: Rust-py

# on:
# push:
# branches: ["main"]
# pull_request:
# branches: ["main"]

# jobs:
# build:
# runs-on: ${{ matrix.os }}
# strategy:
# matrix:
# os: [ubuntu-latest]
# rust: [stable]

# steps:
# - uses: actions/checkout@v3

# - name: Install Rust
# uses: actions-rs/toolchain@v1
# with:
# profile: minimal
# toolchain: ${{ matrix.rust }}
# override: true
# components: rustfmt, clippy # , cargo-llvm-cov

# - name: Cache dependencies
# uses: actions/cache@v3
# with:
# path: |
# ~/.cargo/bin/
# ~/.cargo/registry/index/
# ~/.cargo/registry/cache/
# ~/.cargo/git/db/
# target/
# key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}

# - name: Set up Python
# uses: actions/setup-python@v5
# with:
# python-version: "3.11.10"

# - name: Install maturin
# run: |
# python3 -m pip install maturin

# - name: Build and install python module
# run: |
# cd datago
# maturin build -i python3.11 --release --target "x86_64-unknown-linux-gnu"
# cd target/wheels
# python3 -m pip install --user -v *.whl

# - name: Run the python unit tests
# env:
# DATAROOM_API_KEY: ${{ secrets.DATAROOM_API_KEY }}
# DATAROOM_TEST_SOURCE: ${{ secrets.DATAROOM_TEST_SOURCE }}
# DATAROOM_API_URL: ${{ secrets.DATAROOM_API_URL }}

# run: |
# ls
# # python3 -m pip install -r requirements-tests.txt
# # pytest -xv python/*


name: CI
name: Rust-py

on:
push:
branches:
- main
tags:
- '*'
- "*"
pull_request:
branches:
- main
- "*"
workflow_dispatch:

permissions:
Expand All @@ -91,29 +25,38 @@ jobs:

steps:
- uses: actions/checkout@v4
- run: git fetch --prune --unshallow

- uses: actions/setup-python@v5
with:
python-version: 3.x

python-version: 3.11

- name: Install maturin
run: |
python3 -m pip install maturin twine
- name: Build and upload the package
# Gather the name of the latest tag on the current main branch
- name: Get the latest tag
id: get_tag
run: echo "tag=$(git describe --tags --abbrev=0)" >> $GITHUB_OUTPUT

- name: Build the package
run: |
maturin build -i python3.11 --release --out dist --target "x86_64-unknown-linux-gnu"
mv dist/datago-0.0.0-cp311-cp311-linux_x86_64.whl dist/datago-${{ steps.get_tag.outputs.tag }}-cp311-cp311-linux_x86_64.whl
- name: Test package
env:
DATAROOM_API_KEY: ${{ secrets.DATAROOM_API_KEY }}
DATAROOM_TEST_SOURCE: ${{ secrets.DATAROOM_TEST_SOURCE }}
DATAROOM_API_URL: ${{ secrets.DATAROOM_API_URL }}

# - name: Build wheels
# uses: PyO3/maturin-action@v1
# with:
# target: ${{ matrix.platform.target }}
# args: --release --out dist --find-interpreter
# sccache: 'true'
# # manylinux: auto
# docker-options: "--env CIBW_BEFORE_BUILD_LINUX='${{ env.CIBW_BEFORE_BUILD_LINUX }}'"
# env:
# CIBW_BEFORE_BUILD_LINUX: yum -y install openssl openssl-devel perl-IPC-Cmd
# needs to be replaced with the live version of the package / evolving version number
run: |
python3 -m pip install dist/datago-${{ steps.get_tag.outputs.tag }}-cp311-cp311-linux_x86_64.whl
python3 -m pip install -r requirements-tests.txt
cd python
python3 -m pytest -v .
- name: Upload wheels
uses: actions/upload-artifact@v4
Expand Down Expand Up @@ -153,7 +96,7 @@ jobs:
- name: Generate artifact attestation
uses: actions/attest-build-provenance@v1
with:
subject-path: 'wheels-*/*'
subject-path: "wheels-*/*"
- name: Publish to PyPI
if: ${{ startsWith(github.ref, 'refs/tags/') }}
uses: PyO3/maturin-action@v1
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
push:
branches: ["main"]
pull_request:
branches: ["main"]
branches: ["*"]

env:
CARGO_TERM_COLOR: always
Expand Down
31 changes: 30 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
[package]
name = "datago"
version = "0.1.0"
edition = "2021"

[lib]
Expand All @@ -27,6 +26,7 @@ pyo3 = { version = "0.23.4", features = ["extension-module"] }
threadpool = "1.8.1"
num_cpus = "1.16.0"
openssl = { version = "0.10", features = ["vendored"] }
walkdir = "2.5.0"

[profile.release]
opt-level = 3 # Optimize for speed
Expand Down
21 changes: 2 additions & 19 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,26 +34,9 @@ import json
config = {
"source_config": {
"sources": os.environ.get("DATAROOM_TEST_SOURCE", ""),
"sources_ne": "",
"require_images": True,
"require_embeddings": True,
"tags": "",
"tags_ne": "",
"has_attributes": "",
"lacks_attributes": "",
"has_masks": "",
"lacks_masks": "",
"has_latents": "",
"lacks_latents": "",
"min_short_edge": 0,
"max_short_edge": 0,
"min_pixel_count": -1,
"max_pixel_count": -1,
"duplicate_state": -1,
"random_sampling": False,
"page_size": 10,
"page_size": 500,
},
"limit": 2,
"limit": 20,
"rank": 0,
"world_size": 1,
"samples_buffer_size": 1,
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
[project]
name = "datago"
version = "2025.2.1"
authors = [
{ name="Photoroom", email="[email protected]" },
]
Expand Down
8 changes: 3 additions & 5 deletions python/benchmark_filesystem.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from datago import datago # type: ignore
import time
from tqdm import tqdm
import os
Expand All @@ -18,12 +17,9 @@ def benchmark(
):
print(f"Running benchmark for {root_path} - {limit} samples")
client_config = {
"source_type": datago.SourceTypeFileSystem,
"source_type": "file",
"source_config": {
"page_size": 512,
"root_path": root_path,
"rank": 0,
"world_size": 1,
},
"image_config": {
"crop_and_resize": crop_and_resize,
Expand All @@ -36,6 +32,8 @@ def benchmark(
"prefetch_buffer_size": 128,
"samples_buffer_size": 64,
"limit": limit,
"rank": 0,
"world_size": 1,
}

# Make sure in the following that we compare apples to apples, meaning in that case
Expand Down
Loading

0 comments on commit 6efd135

Please sign in to comment.