From c5dde44fc750cd4db4a4e3001fae16ec95cc7106 Mon Sep 17 00:00:00 2001 From: Gabriel Gimenez Date: Thu, 4 Jul 2024 17:39:08 -0300 Subject: [PATCH] fix tests (#73) * update dependencies and fix errors --- .github/workflows/auto-publish.yml | 44 +++++++++++++++---------- .github/workflows/testing.yml | 30 +++++++++-------- .pre-commit-config.yaml | 12 +++---- Makefile | 10 +++--- README.md | 2 +- examples/benchmarks/benchmark.py | 7 ---- mkdocs.yml | 5 ++- pyproject.toml | 36 +++++++++++++++++++++ setup.py | 52 ------------------------------ tests/test_feature_extractors.py | 15 +++++---- tests/test_metrics.py | 2 +- xgbse/__init__.py | 5 ++- xgbse/_debiased_bce.py | 2 +- xgbse/_feature_extractors.py | 21 ++++++++---- xgbse/_kaplan_neighbors.py | 4 +-- xgbse/_meta.py | 1 - 16 files changed, 121 insertions(+), 127 deletions(-) create mode 100644 pyproject.toml delete mode 100644 setup.py diff --git a/.github/workflows/auto-publish.yml b/.github/workflows/auto-publish.yml index 1d6dda6..3e86f7d 100644 --- a/.github/workflows/auto-publish.yml +++ b/.github/workflows/auto-publish.yml @@ -6,20 +6,30 @@ jobs: release: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.8 - uses: actions/setup-python@v2 - with: - python-version: 3.8 - - name: Install Tools - run: | - python -m pip install --upgrade pip - pip install setuptools wheel twine - - name: Package and Upload - env: - STACKMANAGER_VERSION: ${{ github.event.release.tag_name }} - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_APIKEY }} - run: | - python setup.py sdist bdist_wheel - twine upload dist/* + - uses: actions/checkout@v2 + + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: 3.8 + + - name: Install Poetry + run: | + curl -sSL https://install.python-poetry.org | python3 - + echo "$HOME/.local/bin" >> $GITHUB_PATH + + - name: Configure Poetry + run: | + poetry config pypi-token.pypi ${{ secrets.PYPI_APIKEY }} + + - name: Install dependencies + run: | + poetry install + + - name: Build package + run: | + poetry build + + - name: Publish package + run: | + poetry publish diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index ee1bbc1..8bf7f87 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -3,25 +3,29 @@ name: Code Checks on: push: branches: - - main + - main pull_request: branches: - - main + - main jobs: build: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7] - + python-version: ["3.9", "3.10", "3.11"] steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v1 - with: - python-version: ${{ matrix.python-version }} - - name: Install Testing Dependencies - run: make install-dev - - name: Automated checks - run: make check + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install Poetry + uses: snok/install-poetry@v1 + with: + version: 1.5.1 # You can specify the Poetry version you want to use + virtualenvs-create: true + virtualenvs-in-project: true + - name: Install dependencies + run: poetry install --no-interaction --no-root + - name: Automated checks + run: poetry run make check diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2676077..5ee5702 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,12 +9,8 @@ repos: - id: check-yaml - id: check-added-large-files args: ["--maxkb=2000"] - - repo: https://gitlab.com/pycqa/flake8 - rev: 8f9b4931b9a28896fb43edccb23016a7540f5b82 + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.5.0 hooks: - - id: flake8 - - repo: https://github.com/psf/black - rev: 20.8b1 - hooks: - - id: black - language_version: python3.7 + - id: ruff + - id: ruff-format diff --git a/Makefile b/Makefile index ec83024..6ad2330 100644 --- a/Makefile +++ b/Makefile @@ -1,13 +1,13 @@ -black: - black xgbse setup.py tests/ --check +format: + ruff format xgbse tests/ --check -flake: - flake8 xgbse setup.py tests/ +lint: + ruff check xgbse tests/ test: pytest --cov-report term-missing --cov=xgbse tests/ -check: black flake test clean +check: format lint test clean install: python -m pip install -e . diff --git a/README.md b/README.md index 04c1cd8..c2cab96 100644 --- a/README.md +++ b/README.md @@ -409,7 +409,7 @@ To cite this repository: author = {Davi Vieira and Gabriel Gimenez and Guilherme Marmerola and Vitor Estima}, title = {XGBoost Survival Embeddings: improving statistical properties of XGBoost survival analysis implementation}, url = {http://github.com/loft-br/xgboost-survival-embeddings}, - version = {0.2.3}, + version = {0.3.1}, year = {2021}, } ``` diff --git a/examples/benchmarks/benchmark.py b/examples/benchmarks/benchmark.py index 5b36510..29340d2 100644 --- a/examples/benchmarks/benchmark.py +++ b/examples/benchmarks/benchmark.py @@ -11,7 +11,6 @@ def dataframe_to_xy(dataf, event_column, time_column): - e = dataf.loc[:, event_column] t = dataf.loc[:, time_column] return dataf.drop([event_column, time_column], axis=1), convert_to_structured(t, e) @@ -65,7 +64,6 @@ def predict(self): pass def test(self): - self.predict() try: c_index = concordance_index( @@ -172,7 +170,6 @@ def __init__( ) def train(self): - start = time.time() params = {"objective": self.objective} @@ -226,14 +223,12 @@ def __init__( self.objective = objective def train(self): - start = time.time() if self.model.__class__.__name__ not in [ "XGBSEKaplanTree", "XGBSEBootstrapEstimator", ]: - self.model.fit( self.X_train, self.y_train, @@ -244,7 +239,6 @@ def train(self): ) else: - self.model.fit(self.X_train, self.y_train, time_bins=self.time_bins) self.training_time = time.time() - start @@ -284,7 +278,6 @@ def __init__( ) def train(self): - T = self.train_dataset[self.time_column] E = self.train_dataset[self.event_column] diff --git a/mkdocs.yml b/mkdocs.yml index efb46b1..4c0a2db 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -19,13 +19,12 @@ nav: - converters: modules/converters.md - metrics: modules/metrics.md - Examples: + - Basic usage: examples/basic_usage.md - Confidence intervals: examples/confidence_interval.md - Extrapolation: examples/extrapolation_example.md - Benchmarks: benchmarks/benchmarks.md plugins: - - mkdocstrings: - watch: - - xgbse + - mkdocstrings - search copyright: theme: diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4982678 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,36 @@ +[tool.poetry] +name = "xgbse" +version = "0.3.1" +description = "Improving XGBoost survival analysis with embeddings and debiased estimators" +authors = ["Loft Data Science Team "] +readme = "README.md" +packages = [{ include = "xgbse" }] +repository = "https://github.com/loft-br/xgboost-survival-embeddings" + +[tool.poetry.dependencies] +python = ">=3.9" +xgboost = "^2.1.0" +numpy = "^1.26.4" +scikit-learn = "^1.5.0" +pandas = "^2.2.0" +joblib = "^1.4.2" +lifelines = "^0.29.0" + +[tool.poetry.group.docs.dependencies] +mkdocs = "^1.6.0" +mkdocs-material = "^9.5.28" +mkdocstrings = { version = ">=0.18", extras = ["python-legacy"] } + + +[tool.poetry.group.dev.dependencies] +pre-commit = "^3.7.1" +pytest = "^8.2.2" +pytest-cov = "^5.0.0" +ruff = "^0.5.0" + +[tool.poetry.group.benchmark.dependencies] +pycox = "0.2.1" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/setup.py b/setup.py deleted file mode 100644 index 7b7236d..0000000 --- a/setup.py +++ /dev/null @@ -1,52 +0,0 @@ -import setuptools - -install_requires = [ - "xgboost>=1.4.0", - "numpy>=1.18.4", - "scikit-learn>=0.22.2", - "pandas>=1.0.*", - "joblib>=0.15.1", - "lifelines>=0.25.4", -] - -docs_packages = [ - "mkdocs>=1.1", - "mkdocs-material>=4.6.3", - "mkdocstrings>=0.8.0", -] - -dev_packages = [ - "black>=19.10b0", - "flake8>=3.7.9", - "pre-commit>=2.7.1", - "pytest>=6.1.0", - "pytest-cov==2.10.1", -] + docs_packages - -benchmark_packages = [ - "pycox==0.2.1", -] - -all_packages = install_requires + dev_packages + benchmark_packages - -with open("docs/index.md", "r", encoding="utf-8") as fh: - long_description = fh.read() - -setuptools.setup( - name="xgbse", - version="0.2.3", - author="Loft Data Science Team", - author_email="bandits@loft.com.br", - description="Improving XGBoost survival analysis with embeddings and debiased estimators", - long_description=long_description, - long_description_content_type="text/markdown", - packages=setuptools.find_packages(), - install_requires=install_requires, - extras_require={ - "docs": docs_packages, - "dev": dev_packages, - "all": all_packages, - }, - python_requires=">=3.7", - url="https://github.com/loft-br/xgboost-survival-embeddings", -) diff --git a/tests/test_feature_extractors.py b/tests/test_feature_extractors.py index a14fdd9..1e56733 100644 --- a/tests/test_feature_extractors.py +++ b/tests/test_feature_extractors.py @@ -31,18 +31,21 @@ def test_no_objective(): def test_predict_leaves_early_stop(): xgbse = FeatureExtractor() + early_stopping_rounds = 10 xgbse.fit( X_train, y_train, - num_boost_round=10000, + num_boost_round=1000, validation_data=(X_valid, y_valid), - early_stopping_rounds=10, + early_stopping_rounds=early_stopping_rounds, verbose_eval=0, ) prediction = xgbse.predict_leaves(X_test) - assert prediction.shape == ( - X_test.shape[0], - xgbse.bst.best_iteration + 1, + assert prediction.shape[0] == X_test.shape[0] + assert ( + xgbse.bst.best_iteration + <= prediction.shape[1] + <= xgbse.bst.best_iteration + 1 + early_stopping_rounds ) @@ -64,7 +67,7 @@ def test_predict_hazard_early_stop(): xgbse.fit( X_train, y_train, - num_boost_round=10000, + num_boost_round=1000, validation_data=(X_valid, y_valid), early_stopping_rounds=10, verbose_eval=0, diff --git a/tests/test_metrics.py b/tests/test_metrics.py index ccf685b..3440814 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -68,7 +68,7 @@ def is_dist_cal_return_correct_len(): def is_dist_cal_return_correct_type(): result = dist_calibration_score(y_train, km_survival, returns="all") - return type(result) == dict + return isinstance(result, dict) # testing diff --git a/xgbse/__init__.py b/xgbse/__init__.py index f085352..67f6571 100644 --- a/xgbse/__init__.py +++ b/xgbse/__init__.py @@ -2,11 +2,10 @@ from ._debiased_bce import XGBSEDebiasedBCE from ._kaplan_neighbors import XGBSEKaplanNeighbors, XGBSEKaplanTree -from ._stacked_weibull import XGBSEStackedWeibull from ._meta import XGBSEBootstrapEstimator +from ._stacked_weibull import XGBSEStackedWeibull - -__version__ = "0.2.3" +__version__ = "0.3.1" __all__ = [ "XGBSEDebiasedBCE", diff --git a/xgbse/_debiased_bce.py b/xgbse/_debiased_bce.py index 6f1be31..83e71d9 100644 --- a/xgbse/_debiased_bce.py +++ b/xgbse/_debiased_bce.py @@ -292,7 +292,7 @@ def predict(self, X: pd.DataFrame, return_interval_probs: bool = False): XGBoost model. return_interval_probs (Bool): Boolean indicating if interval probabilities - are to be returned. If False the cumulative survival is returned. + are to be returned. If False the cumulative survival is returned. Default is False. Returns: diff --git a/xgbse/_feature_extractors.py b/xgbse/_feature_extractors.py index 83cbe80..0263b74 100644 --- a/xgbse/_feature_extractors.py +++ b/xgbse/_feature_extractors.py @@ -22,7 +22,7 @@ def __init__( """ if not xgb_params: xgb_params = {} - xgb_params = check_xgboost_parameters(xgb_params) + xgb_params = check_xgboost_parameters(xgb_params, enable_categorical) self.xgb_params = xgb_params self.persist_train = False @@ -119,21 +119,20 @@ def predict_leaves(self, X): raise ValueError("XGBoost model not fitted yet.") dmatrix = xgb.DMatrix(X, enable_categorical=self.enable_categorical) - return self.bst.predict( - dmatrix, pred_leaf=True, iteration_range=(0, self.bst.best_iteration + 1) - ) + return self.bst.predict(dmatrix, pred_leaf=True) def predict_hazard(self, X): if not hasattr(self, "bst"): raise ValueError("XGBoost model not fitted yet.") return self.bst.predict( - xgb.DMatrix(X, enable_categorical=self.enable_categorical), - iteration_range=(0, self.bst.best_iteration + 1), + xgb.DMatrix(X, enable_categorical=self.enable_categorical) ) -def check_xgboost_parameters(xgb_params: Dict[str, Any]) -> Dict[str, Any]: +def check_xgboost_parameters( + xgb_params: Dict[str, Any], enable_categorical: bool +) -> Dict[str, Any]: """Check if XGBoost objective parameter is valid. Args: @@ -145,6 +144,14 @@ def check_xgboost_parameters(xgb_params: Dict[str, Any]) -> Dict[str, Any]: Raises: ValueError: If XGBoost parameters are not valid for survival analysis. """ + if enable_categorical: + if "tree_method" not in xgb_params: + xgb_params["tree_method"] = "hist" + if xgb_params["tree_method"] not in ("hist", "gpu_hist"): + raise ValueError( + "XGBoost tree_method must be either 'hist' or 'gpu_hist' for categorical features" + ) + if "objective" not in xgb_params: xgb_params["objective"] = "survival:aft" if xgb_params["objective"] not in ("survival:aft", "survival:cox"): diff --git a/xgbse/_kaplan_neighbors.py b/xgbse/_kaplan_neighbors.py index a725d62..1cc17d7 100644 --- a/xgbse/_kaplan_neighbors.py +++ b/xgbse/_kaplan_neighbors.py @@ -19,7 +19,7 @@ DEFAULT_PARAMS_TREE = { "objective": "survival:cox", "eval_metric": "cox-nloglik", - "tree_method": "exact", + "tree_method": "hist", "max_depth": 100, "booster": "dart", "subsample": 1.0, @@ -275,7 +275,7 @@ def __init__( DEFAULT_PARAMS_TREE = { "objective": "survival:cox", "eval_metric": "cox-nloglik", - "tree_method": "exact", + "tree_method": "hist", "max_depth": 100, "booster": "dart", "subsample": 1.0, diff --git a/xgbse/_meta.py b/xgbse/_meta.py index 438f69a..b679b94 100644 --- a/xgbse/_meta.py +++ b/xgbse/_meta.py @@ -5,7 +5,6 @@ class XGBSEBootstrapEstimator(BaseEstimator): - """ Bootstrap meta-estimator for XGBSE models: