Skip to content

Commit

Permalink
enhance: Refine the coding style and enable lint-action
Browse files Browse the repository at this point in the history
Signed-off-by: yangxuan <[email protected]>
  • Loading branch information
XuanYang-cn authored and alwayslove2013 committed Jan 9, 2025
1 parent cc30d03 commit 032515f
Show file tree
Hide file tree
Showing 103 changed files with 2,490 additions and 2,126 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/pull_request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ jobs:
python -m pip install --upgrade pip
pip install -e ".[test]"
- name: Run coding checks
run: |
make lint
- name: Test with pytest
run: |
make unittest
49 changes: 0 additions & 49 deletions .ruff.toml

This file was deleted.

8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,2 +1,10 @@
unittest:
PYTHONPATH=`pwd` python3 -m pytest tests/test_dataset.py::TestDataSet::test_download_small -svv

format:
PYTHONPATH=`pwd` python3 -m black vectordb_bench
PYTHONPATH=`pwd` python3 -m ruff check vectordb_bench --fix

lint:
PYTHONPATH=`pwd` python3 -m black vectordb_bench --check
PYTHONPATH=`pwd` python3 -m ruff check vectordb_bench
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -240,13 +240,13 @@ After reopen the repository in container, run `python -m vectordb_bench` in the

### Check coding styles
```shell
$ ruff check vectordb_bench
$ make lint
```

Add `--fix` if you want to fix the coding styles automatically
To fix the coding styles automatically

```shell
$ ruff check vectordb_bench --fix
$ make format
```

## How does it work?
Expand Down
114 changes: 114 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ dynamic = ["version"]

[project.optional-dependencies]
test = [
"black",
"ruff",
"pytest",
]
Expand Down Expand Up @@ -93,3 +94,116 @@ init_bench = "vectordb_bench.__main__:main"
vectordbbench = "vectordb_bench.cli.vectordbbench:cli"

[tool.setuptools_scm]

[tool.black]
line-length = 120
target-version = ['py311']
include = '\.pyi?$'

[tool.ruff]
lint.select = [
"E",
"F",
"C90",
"I",
"N",
"B", "C", "G",
"A",
"ANN001",
"S", "T", "W", "ARG", "BLE", "COM", "DJ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"
]
lint.ignore = [
"BLE001", # blind-except (BLE001)
"SLF001", # SLF001 Private member accessed [E]
"TRY003", # [ruff] TRY003 Avoid specifying long messages outside the exception class [E]
"FBT001", "FBT002", "FBT003",
"G004", # [ruff] G004 Logging statement uses f-string [E]
"UP031",
"RUF012",
"EM101",
"N805",
"ARG002",
"ARG003",
"PIE796", # https://github.com/zilliztech/VectorDBBench/issues/438
"INP001", # TODO
"TID252", # TODO
"N801", "N802", "N815",
"S101", "S108", "S603", "S311",
"PLR2004",
"RUF017",
"C416",
"PLW0603",
]

# Allow autofix for all enabled rules (when `--fix`) is provided.
lint.fixable = [
"A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W",
"ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT",
"ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH",
"PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP",
"YTT",
]
lint.unfixable = []

show-fixes = true

# Exclude a variety of commonly ignored directories.
exclude = [
".bzr",
".direnv",
".eggs",
".git",
".git-rewrite",
".hg",
".mypy_cache",
".nox",
".pants.d",
".pytype",
".ruff_cache",
".svn",
".tox",
".venv",
"__pypackages__",
"_build",
"buck-out",
"build",
"dist",
"node_modules",
"venv",
"grpc_gen",
"__pycache__",
"frontend", # TODO
"tests",
]

# Same as Black.
line-length = 120

# Allow unused variables when underscore-prefixed.
lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"

# Assume Python 3.11
target-version = "py311"

[tool.ruff.lint.mccabe]
# Unlike Flake8, default to a complexity level of 10.
max-complexity = 18

[tool.ruff.lint.pycodestyle]
max-line-length = 120
max-doc-length = 120

[tool.ruff.lint.pylint]
max-args = 20
max-branches = 15

[tool.ruff.lint.flake8-builtins]
builtins-ignorelist = [
# "format",
# "next",
# "object", # TODO
# "id",
# "dict", # TODO
# "filter",
]

73 changes: 49 additions & 24 deletions vectordb_bench/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,46 +22,71 @@ class config:
DROP_OLD = env.bool("DROP_OLD", True)
USE_SHUFFLED_DATA = env.bool("USE_SHUFFLED_DATA", True)

NUM_CONCURRENCY = env.list("NUM_CONCURRENCY", [1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100], subcast=int )
NUM_CONCURRENCY = env.list(
"NUM_CONCURRENCY",
[
1,
5,
10,
15,
20,
25,
30,
35,
40,
45,
50,
55,
60,
65,
70,
75,
80,
85,
90,
95,
100,
],
subcast=int,
)

CONCURRENCY_DURATION = 30

RESULTS_LOCAL_DIR = env.path(
"RESULTS_LOCAL_DIR", pathlib.Path(__file__).parent.joinpath("results")
"RESULTS_LOCAL_DIR",
pathlib.Path(__file__).parent.joinpath("results"),
)
CONFIG_LOCAL_DIR = env.path(
"CONFIG_LOCAL_DIR", pathlib.Path(__file__).parent.joinpath("config-files")
"CONFIG_LOCAL_DIR",
pathlib.Path(__file__).parent.joinpath("config-files"),
)


K_DEFAULT = 100 # default return top k nearest neighbors during search
CUSTOM_CONFIG_DIR = pathlib.Path(__file__).parent.joinpath("custom/custom_case.json")

CAPACITY_TIMEOUT_IN_SECONDS = 24 * 3600 # 24h
LOAD_TIMEOUT_DEFAULT = 24 * 3600 # 24h
LOAD_TIMEOUT_768D_1M = 24 * 3600 # 24h
LOAD_TIMEOUT_768D_10M = 240 * 3600 # 10d
LOAD_TIMEOUT_768D_100M = 2400 * 3600 # 100d
CAPACITY_TIMEOUT_IN_SECONDS = 24 * 3600 # 24h
LOAD_TIMEOUT_DEFAULT = 24 * 3600 # 24h
LOAD_TIMEOUT_768D_1M = 24 * 3600 # 24h
LOAD_TIMEOUT_768D_10M = 240 * 3600 # 10d
LOAD_TIMEOUT_768D_100M = 2400 * 3600 # 100d

LOAD_TIMEOUT_1536D_500K = 24 * 3600 # 24h
LOAD_TIMEOUT_1536D_5M = 240 * 3600 # 10d
LOAD_TIMEOUT_1536D_500K = 24 * 3600 # 24h
LOAD_TIMEOUT_1536D_5M = 240 * 3600 # 10d

OPTIMIZE_TIMEOUT_DEFAULT = 24 * 3600 # 24h
OPTIMIZE_TIMEOUT_768D_1M = 24 * 3600 # 24h
OPTIMIZE_TIMEOUT_768D_10M = 240 * 3600 # 10d
OPTIMIZE_TIMEOUT_768D_100M = 2400 * 3600 # 100d
OPTIMIZE_TIMEOUT_DEFAULT = 24 * 3600 # 24h
OPTIMIZE_TIMEOUT_768D_1M = 24 * 3600 # 24h
OPTIMIZE_TIMEOUT_768D_10M = 240 * 3600 # 10d
OPTIMIZE_TIMEOUT_768D_100M = 2400 * 3600 # 100d

OPTIMIZE_TIMEOUT_1536D_500K = 24 * 3600 # 24h
OPTIMIZE_TIMEOUT_1536D_5M = 240 * 3600 # 10d

OPTIMIZE_TIMEOUT_1536D_500K = 24 * 3600 # 24h
OPTIMIZE_TIMEOUT_1536D_5M = 240 * 3600 # 10d

def display(self) -> str:
tmp = [
i for i in inspect.getmembers(self)
if not inspect.ismethod(i[1])
and not i[0].startswith('_')
and "TIMEOUT" not in i[0]
return [
i
for i in inspect.getmembers(self)
if not inspect.ismethod(i[1]) and not i[0].startswith("_") and "TIMEOUT" not in i[0]
]
return tmp


log_util.init(config.LOG_LEVEL)
7 changes: 4 additions & 3 deletions vectordb_bench/__main__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import traceback
import logging
import pathlib
import subprocess
import os
import traceback

from . import config

log = logging.getLogger("vectordb_bench")
Expand All @@ -16,7 +17,7 @@ def run_streamlit():
cmd = [
"streamlit",
"run",
f"{os.path.dirname(__file__)}/frontend/vdb_benchmark.py",
f"{pathlib.Path(__file__).parent}/frontend/vdb_benchmark.py",
"--logger.level",
"info",
"--theme.base",
Expand Down
25 changes: 12 additions & 13 deletions vectordb_bench/backend/assembler.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,32 @@
from .cases import CaseLabel
from .task_runner import CaseRunner, RunningStatus, TaskRunner
from ..models import TaskConfig
from ..backend.clients import EmptyDBCaseConfig
from ..backend.data_source import DatasetSource
import logging

from vectordb_bench.backend.clients import EmptyDBCaseConfig
from vectordb_bench.backend.data_source import DatasetSource
from vectordb_bench.models import TaskConfig

from .cases import CaseLabel
from .task_runner import CaseRunner, RunningStatus, TaskRunner

log = logging.getLogger(__name__)


class Assembler:
@classmethod
def assemble(cls, run_id , task: TaskConfig, source: DatasetSource) -> CaseRunner:
def assemble(cls, run_id: str, task: TaskConfig, source: DatasetSource) -> CaseRunner:
c_cls = task.case_config.case_id.case_cls

c = c_cls(task.case_config.custom_case)
if type(task.db_case_config) != EmptyDBCaseConfig:
if type(task.db_case_config) is not EmptyDBCaseConfig:
task.db_case_config.metric_type = c.dataset.data.metric_type

runner = CaseRunner(
return CaseRunner(
run_id=run_id,
config=task,
ca=c,
status=RunningStatus.PENDING,
dataset_source=source,
)

return runner

@classmethod
def assemble_all(
cls,
Expand All @@ -50,12 +49,12 @@ def assemble_all(
db2runner[db].append(r)

# check dbclient installed
for k in db2runner.keys():
for k in db2runner:
_ = k.init_cls

# sort by dataset size
for k in db2runner.keys():
db2runner[k].sort(key=lambda x:x.ca.dataset.data.size)
for k, _ in db2runner:
db2runner[k].sort(key=lambda x: x.ca.dataset.data.size)

all_runners = []
all_runners.extend(load_runners)
Expand Down
Loading

0 comments on commit 032515f

Please sign in to comment.