diff --git a/poetry.lock b/poetry.lock index d4216c6..5cdebb0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -527,6 +527,14 @@ python-versions = ">=3.8,<4.0" [package.dependencies] requests = ">=2.27.1,<3.0.0" +[[package]] +name = "types-python-slugify" +version = "5.0.3" +description = "Typing stubs for python-slugify" +category = "dev" +optional = false +python-versions = "*" + [[package]] name = "types-requests" version = "2.27.9" @@ -588,7 +596,7 @@ testing = ["coverage (>=4)", "coverage-enable-subprocess (>=1)", "flaky (>=3)", [metadata] lock-version = "1.1" python-versions = "^3.8" -content-hash = "89649c5414fd19344284104bc7e1e8472dec1afa5b66099b3689061458739c64" +content-hash = "bffd3cbbb8c8f3c00bb451ff548653615fe6ab3c1cfce1698cb59719201eb2ec" [metadata.files] atomicwrites = [ @@ -924,6 +932,10 @@ toucan-client = [ {file = "toucan_client-1.1.1-py3-none-any.whl", hash = "sha256:0d8047c7c7c627cf4640a5834d6d10cd1bdb5a6a7d8f8e75847ed91a78e0013f"}, {file = "toucan_client-1.1.1.tar.gz", hash = "sha256:db51600144557ba6ca52c5aec4360ccda4c11913ee5dbd81094e2b60fcf5d6ab"}, ] +types-python-slugify = [ + {file = "types-python-slugify-5.0.3.tar.gz", hash = "sha256:76169f4a6d40896fea76fb45a25c50ac7ba2ca03eee759ecac322a05fe4dd21c"}, + {file = "types_python_slugify-5.0.3-py3-none-any.whl", hash = "sha256:a5761d3c55e949f8ace0694eb5be81210087084bb78df495de14ee5eaad6ac54"}, +] types-requests = [ {file = "types-requests-2.27.9.tar.gz", hash = "sha256:7368974534d297939492efdfdab232930440b11e2203f6df1f0c40e3242a87ea"}, {file = "types_requests-2.27.9-py3-none-any.whl", hash = "sha256:74070045418faf710f3154403d6a16c9e67db50e5119906ca6955f1658d20f7b"}, diff --git a/pyproject.toml b/pyproject.toml index 782fab7..566bb00 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ pytest = "^7.0.0" pytest-cov = "^3.0.0" pytest-mock = "^3.7.0" # types +types-python-slugify = "^5.0.3" types-requests = "^2.27.9" [build-system] @@ -75,6 +76,45 @@ warn_unreachable = true show_error_codes = true show_column_numbers = true show_error_context = true +exclude = [ + "decorators\\.py$", + # generic functions + "generic/add_missing_row\\.py$", + "generic/clean\\.py$", + "generic/combine_columns_aggregation\\.py$", + "generic/compute_cumsum\\.py$", + "generic/compute_evolution\\.py$", + "generic/compute_ffill_by_group\\.py$", + "generic/date_requester\\.py$", + "generic/roll_up\\.py$", + "generic/two_values_melt\\.py$", + # postprocess functions + "postprocess/add_aggregation_columns\\.py$", + "postprocess/argmax\\.py$", + "postprocess/categories_from_dates\\.py$", + "postprocess/converter\\.py$", + "postprocess/cumsum\\.py$", + "postprocess/fillna\\.py$", + "postprocess/filter_by_date\\.py$", + "postprocess/filter\\.py$", + "postprocess/groupby\\.py$", + "postprocess/if_else\\.py$", + "postprocess/json_to_table\\.py$", + "postprocess/linear_regression\\.py$", + "postprocess/math\\.py$", + "postprocess/melt\\.py$", + "postprocess/percentage\\.py$", + "postprocess/pivot\\.py$", + "postprocess/rank\\.py$", + "postprocess/rename\\.py$", + "postprocess/replace\\.py$", + "postprocess/sort\\.py$", + "postprocess/text\\.py$", + "postprocess/top\\.py$", + "postprocess/waterfall\\.py$", + # all tests + "^tests" +] [tool.coverage.report] exclude_lines = [ diff --git a/toucan_data_sdk/utils/decorators.py b/toucan_data_sdk/utils/decorators.py index d81209e..d7eb164 100644 --- a/toucan_data_sdk/utils/decorators.py +++ b/toucan_data_sdk/utils/decorators.py @@ -42,6 +42,7 @@ def parse_reseau(df): from functools import partial, wraps from hashlib import md5 from threading import current_thread +from typing import Optional import joblib import pandas as pd @@ -57,7 +58,7 @@ def parse_reseau(df): _logger = logging.getLogger(__name__) -def catch(logger): +def catch(logger: logging.Logger) -> Callable: """ Decorator to catch an exception and don't raise it. Logs information if a decorator failed. @@ -337,7 +338,7 @@ def f(**tmp_extra_kwargs): result = f(**tmp_extra_kwargs) if limit is not None: - clean_cachedir_old_entries(f.store_backend, func.__name__, limit) + clean_cachedir_old_entries(f.store_backend, func.__name__, limit) # type: ignore[attr-defined] return result @@ -349,14 +350,16 @@ def f(**tmp_extra_kwargs): method_cache = partial(cache, applied_on_method=True) -def setup_cachedir(cachedir, mmap_mode=None, bytes_limit=None): +def setup_cachedir( + cachedir: str, mmap_mode: Optional[str] = None, bytes_limit: Optional[int] = None +) -> joblib.Memory: """This function injects a joblib.Memory object in the cache() function (in a thread-specific slot of its 'memories' attribute).""" if not hasattr(cache, "memories"): - cache.memories = {} + cache.memories = {} # type: ignore[attr-defined] memory = joblib.Memory( location=cachedir, verbose=0, mmap_mode=mmap_mode, bytes_limit=bytes_limit ) - cache.memories[current_thread().name] = memory + cache.memories[current_thread().name] = memory # type: ignore[attr-defined] return memory diff --git a/toucan_data_sdk/utils/helpers.py b/toucan_data_sdk/utils/helpers.py index 78643ca..e67bcb0 100644 --- a/toucan_data_sdk/utils/helpers.py +++ b/toucan_data_sdk/utils/helpers.py @@ -7,17 +7,30 @@ import threading from contextlib import contextmanager from pathlib import Path -from typing import List, Optional - -from joblib._store_backends import StoreBackendBase +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + Generator, + List, + Optional, + Sequence, + Tuple, +) + +from joblib._store_backends import CacheItemInfo, StoreBackendBase from slugify import slugify as _slugify +if TYPE_CHECKING: + import pandas as pd + logger = logging.getLogger(__name__) LOCALE_LOCK = threading.Lock() CURRENT_LOCALE = locale.getlocale() -def get_temp_column_name(df) -> str: +def get_temp_column_name(df: "pd.DataFrame") -> str: """Small helper to get a new column name that does not already exist""" temp_column_name = "__tmp__" while temp_column_name in df.columns: @@ -26,7 +39,7 @@ def get_temp_column_name(df) -> str: @contextmanager -def setlocale(name: Optional[str]): +def setlocale(name: Optional[str]) -> Generator[str, None, None]: """ Context manager to set a locale ('en', 'fr', 'de', ...) """ @@ -77,7 +90,7 @@ def get_param_value_from_func_call(param_name, func, call_args, call_kwargs): return call.arguments[param_name] -def get_func_sourcecode(func): +def get_func_sourcecode(func: Callable[..., Any]) -> str: """ Try to get sourcecode using standard inspect.getsource(). If the function comes from a module which has been created dynamically @@ -87,15 +100,15 @@ def get_func_sourcecode(func): the original module code. """ - def getsource(func): + def getsource(func: Callable[..., Any]) -> str: lines, lnum = getsourcelines(func) return "".join(lines) - def getsourcelines(func): + def getsourcelines(func: Callable[..., Any]) -> Tuple[Sequence[str], int]: lines, lnum = findsource(func) return inspect.getblock(lines[lnum:]), lnum + 1 - def findsource(func): + def findsource(func: Callable[..., Any]) -> Tuple[List[str], int]: file = getfile(func) # file path module = inspect.getmodule(func, file) lines = linecache.getlines(file, module.__dict__) @@ -108,9 +121,9 @@ def findsource(func): lnum = lnum - 1 # pragma: no cover return lines, lnum - def getfile(func): + def getfile(func: Callable[..., Any]) -> str: module = inspect.getmodule(func) - return module.__file__ + return module.__file__ # type: ignore try: return inspect.getsource(func) @@ -129,14 +142,14 @@ def check_params_columns_duplicate(cols_name: List[str]) -> bool: def slugify(name: str, separator: str = "-") -> str: """Returns a slugified name (we allow _ to be used)""" - return _slugify(name, regex_pattern=re.compile("[^-_a-z0-9]+"), separator=separator) + return _slugify(name, regex_pattern="[^-_a-z0-9]+", separator=separator) -def resolve_dependencies(func_name, dependencies): +def resolve_dependencies(func_name: str, dependencies: Dict[str, List[str]]) -> List[str]: """Given a function name and a mapping of function dependencies, returns a list of *all* the dependencies for this function.""" - def _resolve_deps(func_name, func_deps): + def _resolve_deps(func_name: str, func_deps: List[str]) -> None: """Append dependencies recursively to func_deps (accumulator)""" if func_name in func_deps: return @@ -145,7 +158,7 @@ def _resolve_deps(func_name, func_deps): for dep in dependencies.get(func_name, []): _resolve_deps(dep, func_deps) - func_deps = [] + func_deps: List[str] = [] _resolve_deps(func_name, func_deps) return sorted(func_deps) @@ -156,7 +169,7 @@ def clean_cachedir_old_entries(cachedir: StoreBackendBase, func_name: str, limit raise ValueError("'limit' must be greater or equal to 1") cache_entries = get_cachedir_entries(cachedir, func_name) - cache_entries = sorted(cache_entries, key=lambda e: e.last_access, reverse=True) + cache_entries = sorted(cache_entries, key=lambda e: e.last_access, reverse=True) # type: ignore[no-any-return] cache_entries_to_remove = cache_entries[limit:] for entry in cache_entries_to_remove: shutil.rmtree(entry.path, ignore_errors=True) @@ -164,7 +177,7 @@ def clean_cachedir_old_entries(cachedir: StoreBackendBase, func_name: str, limit return len(cache_entries_to_remove) -def get_cachedir_entries(cachedir: StoreBackendBase, func_name: str) -> list: +def get_cachedir_entries(cachedir: StoreBackendBase, func_name: str) -> List[CacheItemInfo]: entries = cachedir.get_items() return [e for e in entries if Path(e.path).parent.name == func_name] diff --git a/toucan_data_sdk/utils/traceback.py b/toucan_data_sdk/utils/traceback.py index 4afbb81..d5e87e3 100644 --- a/toucan_data_sdk/utils/traceback.py +++ b/toucan_data_sdk/utils/traceback.py @@ -1,13 +1,13 @@ import io import os import types -from typing import Any, Dict +from typing import Any, Dict, Tuple import joblib -def _from_pickled_dict(d): - def from_picklable(obj): +def _from_pickled_dict(d: Dict[str, Any]) -> Any: + def from_picklable(obj: Any) -> Any: if obj is None: return None try: @@ -23,7 +23,7 @@ def from_picklable(obj): return unpickled -def _from_serializable_traceback(d): +def _from_serializable_traceback(d: Dict[str, Any]) -> Tuple[Any, Any, Any]: tb = d["tb"] tb = types.SimpleNamespace(**tb) tb.tb_frame = types.SimpleNamespace(**tb.tb_frame) @@ -33,7 +33,7 @@ def _from_serializable_traceback(d): return d["exc_type"], d["exc_value"], tb -def _print_tb(exc_value, tb): +def _print_tb(exc_value: Any, tb: Any) -> None: lineno = tb.tb_lineno lines = tb.sourcecode.splitlines() @@ -43,10 +43,10 @@ def _print_tb(exc_value, tb): lines = lines[first_lineno:last_lineno] lines_numbers = list(range(first_lineno + 1, last_lineno + 1)) - lines_numbers = [str(n).rjust(4) + "|" for n in lines_numbers] - lines_numbers[relative_err_lineno] = "====>" + lines_numbers_str = [str(n).rjust(4) + "|" for n in lines_numbers] + lines_numbers_str[relative_err_lineno] = "====>" - for lineno, line in zip(lines_numbers, lines): + for lineno, line in zip(lines_numbers_str, lines): print(lineno + line) print("−−−−−−−−−−−−") print(exc_value)