diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 665e9a151..fbe0d34fd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,13 +17,8 @@ repos: hooks: - id: isort name: isort (python) - - id: mypy - name: mypy - description: "" - entry: mypy - language: python - "types_or": [python, pyi] - require_serial: true - pass_filenames: false - additional_dependencies: [] - minimum_pre_commit_version: "2.9.2" + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.14.1 + hooks: + - id: mypy + args: [--follow-imports=skip] diff --git a/logprep/abc/input.py b/logprep/abc/input.py index 585a5f4de..95b3949e6 100644 --- a/logprep/abc/input.py +++ b/logprep/abc/input.py @@ -8,7 +8,7 @@ import zlib from abc import abstractmethod from copy import deepcopy -from functools import partial, cached_property +from functools import cached_property, partial from hmac import HMAC from typing import Optional, Tuple from zoneinfo import ZoneInfo @@ -91,9 +91,9 @@ class TimeDeltaConfig: """TimeDelta Configurations Works only if the preprocessor log_arrival_time_target_field is set.""" - target_field: field(validator=[validators.instance_of(str), lambda _, __, x: bool(x)]) + target_field: field(validator=(validators.instance_of(str), lambda _, __, x: bool(x))) """Defines the fieldname to which the time difference should be written to.""" - reference_field: field(validator=[validators.instance_of(str), lambda _, __, x: bool(x)]) + reference_field: field(validator=(validators.instance_of(str), lambda _, __, x: bool(x))) """Defines a field with a timestamp that should be used for the time difference. The calculation will be the arrival time minus the time of this reference field.""" diff --git a/logprep/abc/processor.py b/logprep/abc/processor.py index 21ec1991c..551c2e834 100644 --- a/logprep/abc/processor.py +++ b/logprep/abc/processor.py @@ -6,7 +6,7 @@ from pathlib import Path from typing import TYPE_CHECKING, List, Optional -from attr import define, field, validators +from attrs import define, field, validators from logprep.abc.component import Component from logprep.framework.rule_tree.rule_tree import RuleTree @@ -96,12 +96,12 @@ class Config(Component.Config): As last option it is possible to define entire rules with all their configuration parameters as list elements. """ tree_config: Optional[str] = field( - default=None, validator=[validators.optional(validators.instance_of(str))] + default=None, validator=(validators.optional(validators.instance_of(str))) ) """Path to a JSON file with a valid :ref:`Rule Tree Configuration`. For string format see :ref:`getters`.""" apply_multiple_times: Optional[bool] = field( - default=False, validator=[validators.optional(validators.instance_of(bool))] + default=False, validator=(validators.optional(validators.instance_of(bool))) ) """Set if the processor should be applied multiple times. This enables further processing of an output with the same processor.""" diff --git a/logprep/connector/file/input.py b/logprep/connector/file/input.py index 1d6e439de..05d5e8285 100644 --- a/logprep/connector/file/input.py +++ b/logprep/connector/file/input.py @@ -212,7 +212,7 @@ class Config(Input.Config): format. Needs to be parsed with dissector or another processor""" start: str = field( - validator=[validators.instance_of(str), validators.in_(("begin", "end"))], + validator=(validators.instance_of(str), validators.in_(("begin", "end"))), default="begin", ) """Defines the behaviour of the file monitor with the following options: diff --git a/logprep/connector/opensearch/output.py b/logprep/connector/opensearch/output.py index 0f4c67567..384696c55 100644 --- a/logprep/connector/opensearch/output.py +++ b/logprep/connector/opensearch/output.py @@ -109,23 +109,23 @@ class Config(Output.Config): """(Optional) Timeout after :code:`message_backlog` is flushed if :code:`message_backlog_size` is not reached.""" thread_count: int = field( - default=4, validator=[validators.instance_of(int), validators.gt(1)] + default=4, validator=(validators.instance_of(int), validators.gt(1)) ) """Number of threads to use for bulk requests.""" queue_size: int = field( - default=4, validator=[validators.instance_of(int), validators.gt(1)] + default=4, validator=(validators.instance_of(int), validators.gt(1)) ) """Number of queue size to use for bulk requests.""" chunk_size: int = field( - default=500, validator=[validators.instance_of(int), validators.gt(1)] + default=500, validator=(validators.instance_of(int), validators.gt(1)) ) """Chunk size to use for bulk requests.""" max_chunk_bytes: int = field( - default=100 * 1024 * 1024, validator=[validators.instance_of(int), validators.gt(1)] + default=100 * 1024 * 1024, validator=(validators.instance_of(int), validators.gt(1)) ) """Max chunk size to use for bulk requests. The default is 100MB.""" max_retries: int = field( - default=3, validator=[validators.instance_of(int), validators.gt(0)] + default=3, validator=(validators.instance_of(int), validators.gt(0)) ) """Max retries for all requests. Default is 3.""" desired_cluster_status: list = field( @@ -134,7 +134,7 @@ class Config(Output.Config): """Desired cluster status for health check as list of strings. Default is ["green"]""" default_op_type: str = field( default="index", - validator=[validators.instance_of(str), validators.in_(["create", "index"])], + validator=(validators.instance_of(str), validators.in_(["create", "index"])), ) """Default op_type for indexing documents. Default is 'index', Consider using 'create' for data streams or to prevent overwriting existing documents.""" diff --git a/logprep/generator/http/input.py b/logprep/generator/http/input.py index 6dce88ce2..72f1f0367 100644 --- a/logprep/generator/http/input.py +++ b/logprep/generator/http/input.py @@ -14,21 +14,23 @@ from typing import Generator, List import msgspec -import yaml -from attr import define, field, validators +from attrs import define, field, validators +from ruamel.yaml import YAML from logprep.generator.http.manipulator import Manipulator +yaml = YAML(typ="safe") + @define(kw_only=True) class TimestampReplacementConfig: """Configuration Class fot TimestampReplacement""" - key: str = field(validator=[validators.instance_of(str)]) + key: str = field(validator=(validators.instance_of(str))) format: str = field(validator=validators.instance_of(str)) time_shift: str = field( default="+0000", - validator=[validators.instance_of(str), validators.matches_re(r"[+-]\d{4}")], + validator=(validators.instance_of(str), validators.matches_re(r"[+-]\d{4}")), ) time_delta: timedelta = field( default=None, validator=validators.optional(validators.instance_of(timedelta)) @@ -148,7 +150,7 @@ def _load_event_class_config(self, event_class_dir_path: str) -> EventClassConfi """Load the event class specific configuration""" config_path = os.path.join(event_class_dir_path, "config.yaml") with open(config_path, "r", encoding="utf8") as file: - event_class_config = yaml.safe_load(file) + event_class_config = yaml.load(file) self.log.debug("Following class config was loaded: %s", event_class_config) event_class_config = EventClassConfig(**event_class_config) if "," in event_class_config.target_path: diff --git a/logprep/processor/generic_resolver/rule.py b/logprep/processor/generic_resolver/rule.py index e9f9c9efe..7db73ca84 100644 --- a/logprep/processor/generic_resolver/rule.py +++ b/logprep/processor/generic_resolver/rule.py @@ -117,7 +117,7 @@ class Config(FieldManagerRule.Config): ] ) """Mapping in form of :code:`{SOURCE_FIELD: DESTINATION_FIELD}`""" - resolve_list: dict = field(validator=[validators.instance_of(dict)], factory=dict) + resolve_list: dict = field(validator=(validators.instance_of(dict)), factory=dict) """lookup mapping in form of :code:`{REGEX_PATTERN_0: ADDED_VALUE_0, ..., REGEX_PATTERN_N: ADDED_VALUE_N}`""" resolve_from_file: dict = field( diff --git a/logprep/processor/labeler/processor.py b/logprep/processor/labeler/processor.py index 7dd596120..ab75930bc 100644 --- a/logprep/processor/labeler/processor.py +++ b/logprep/processor/labeler/processor.py @@ -26,7 +26,7 @@ from typing import Optional -from attr import define, field, validators +from attrs import define, field, validators from logprep.abc.processor import Processor from logprep.processor.labeler.labeling_schema import LabelingSchema @@ -41,7 +41,7 @@ class Labeler(Processor): class Config(Processor.Config): """Labeler Configurations""" - schema: str = field(validator=[validators.instance_of(str)]) + schema: str = field(validator=(validators.instance_of(str))) """Path to a labeling schema file. For string format see :ref:`getters`.""" include_parent_labels: Optional[bool] = field( default=False, validator=validators.optional(validator=validators.instance_of(bool)) diff --git a/logprep/processor/pre_detector/rule.py b/logprep/processor/pre_detector/rule.py index 6e63ddafd..832fc6e98 100644 --- a/logprep/processor/pre_detector/rule.py +++ b/logprep/processor/pre_detector/rule.py @@ -88,7 +88,7 @@ ip_fields: - some_ip_field -The pre_detector also has the option to normalize the timestamp. +The pre_detector also has the option to normalize the timestamp. To configure this the following parameters can be set in the rule configuration. .. code-block:: yaml @@ -110,7 +110,7 @@ target_timezone: description: Some malicious event. -All of these new parameters are configurable and default to +All of these new parameters are configurable and default to standard values if not explicitly set. .. autoclass:: logprep.processor.pre_detector.rule.PreDetectorRule.Config @@ -176,11 +176,11 @@ class Config(Rule.Config): # pylint: disable=too-many-instance-attributes timestamp_field: str = field(validator=validators.instance_of(str), default="@timestamp") """the field which has the given timestamp to be normalized defaults to :code:`@timestamp`""" source_timezone: ZoneInfo = field( - validator=[validators.instance_of(ZoneInfo)], converter=ZoneInfo, default="UTC" + validator=(validators.instance_of(ZoneInfo)), converter=ZoneInfo, default="UTC" ) """ timezone of source_fields defaults to :code:`UTC`""" target_timezone: ZoneInfo = field( - validator=[validators.instance_of(ZoneInfo)], converter=ZoneInfo, default="UTC" + validator=(validators.instance_of(ZoneInfo)), converter=ZoneInfo, default="UTC" ) """ timezone for target_field defaults to :code:`UTC`""" failure_tags: list = field( diff --git a/logprep/processor/pseudonymizer/processor.py b/logprep/processor/pseudonymizer/processor.py index 735660425..29523d88c 100644 --- a/logprep/processor/pseudonymizer/processor.py +++ b/logprep/processor/pseudonymizer/processor.py @@ -115,7 +115,7 @@ class Config(FieldManager.Config): * /var/git/logprep-rules/pseudonymizer_rules/regex_mapping.json """ max_cached_pseudonyms: int = field( - validator=[validators.instance_of(int), validators.gt(0)] + validator=(validators.instance_of(int), validators.gt(0)) ) """ The maximum number of cached pseudonyms. One cache entry requires ~250 Byte, thus 10 @@ -127,12 +127,12 @@ class Config(FieldManager.Config): entry is deleted. Has to be greater than 0. """ max_cached_pseudonymized_urls: int = field( - validator=[validators.instance_of(int), validators.gt(0)], default=10000 + validator=(validators.instance_of(int), validators.gt(0)), default=10000 ) """The maximum number of cached pseudonymized urls. Default is 10000. Behaves similarly to the max_cached_pseudonyms. Has to be greater than 0.""" mode: str = field( - validator=[validators.instance_of(str), validators.in_(("GCM", "CTR"))], default="GCM" + validator=(validators.instance_of(str), validators.in_(("GCM", "CTR"))), default="GCM" ) """Optional mode of operation for the encryption. Can be either 'GCM' or 'CTR'. Default is 'GCM'. diff --git a/logprep/processor/requester/rule.py b/logprep/processor/requester/rule.py index 0ba2fb0ac..2ed98bc17 100644 --- a/logprep/processor/requester/rule.py +++ b/logprep/processor/requester/rule.py @@ -150,7 +150,7 @@ class Config(FieldManagerRule.Config): ) """ (Optional) The http headers as dictionary.""" auth: tuple = field( - validator=[validators.instance_of(tuple)], + validator=(validators.instance_of(tuple)), converter=tuple, factory=tuple, ) diff --git a/logprep/processor/timestamper/rule.py b/logprep/processor/timestamper/rule.py index f7f9db8f1..499bd4d7c 100644 --- a/logprep/processor/timestamper/rule.py +++ b/logprep/processor/timestamper/rule.py @@ -9,7 +9,7 @@ :caption: Given timestamper rule filter: "winlog.event_id: 123456789" - timestamper: + timestamper: source_fields: ["winlog.event_data.some_timestamp_utc"] target_field: "@timestamp" source_format: UNIX @@ -122,11 +122,11 @@ class Config(FieldManagerRule.Config): a tag :code:`_timestamper_failure` will be added to the event. """ source_timezone: ZoneInfo = field( - validator=[validators.instance_of(ZoneInfo)], converter=ZoneInfo, default="UTC" + validator=(validators.instance_of(ZoneInfo)), converter=ZoneInfo, default="UTC" ) """ timezone of source_fields. defaults to :code:`UTC`""" target_timezone: ZoneInfo = field( - validator=[validators.instance_of(ZoneInfo)], converter=ZoneInfo, default="UTC" + validator=(validators.instance_of(ZoneInfo)), converter=ZoneInfo, default="UTC" ) """ timezone for target_field. defaults to :code:`UTC`""" mapping: dict = field(default="", init=False, repr=False, eq=False) diff --git a/logprep/util/configuration.py b/logprep/util/configuration.py index e25a689bd..f53a5faa4 100644 --- a/logprep/util/configuration.py +++ b/logprep/util/configuration.py @@ -346,7 +346,7 @@ class LoggerConfig: The log level of the root logger should be set to :code:`INFO` or higher in production environments to avoid exposing sensitive information in the logs. """ - format: str = field(default="", validator=[validators.instance_of(str)], eq=False) + format: str = field(default="", validator=(validators.instance_of(str)), eq=False) """The format of the log message as supported by the :code:`LogprepFormatter`. Defaults to :code:`"%(asctime)-15s %(name)-10s %(levelname)-8s: %(message)s"`. @@ -354,7 +354,7 @@ class LoggerConfig: :no-index: """ - datefmt: str = field(default="", validator=[validators.instance_of(str)], eq=False) + datefmt: str = field(default="", validator=(validators.instance_of(str)), eq=False) """The date format of the log message. Defaults to :code:`"%Y-%m-%d %H:%M:%S"`.""" loggers: dict = field(validator=validators.instance_of(dict), factory=dict) """The loggers loglevel configuration. Defaults to: @@ -469,7 +469,7 @@ class Configuration: Because of that ensure that the configuration endpoint is always available. """ process_count: int = field( - validator=[validators.instance_of(int), validators.ge(1)], default=1, eq=False + validator=(validators.instance_of(int), validators.ge(1)), default=1, eq=False ) """Number of logprep processes to start. Defaults to :code:`1`.""" restart_count: int = field( @@ -478,7 +478,7 @@ class Configuration: """Number of restarts before logprep exits. Defaults to :code:`5`. If this value is set to a negative number, logprep will always restart immediately.""" timeout: float = field( - validator=[validators.instance_of(float), validators.gt(0)], default=5.0, eq=False + validator=(validators.instance_of(float), validators.gt(0)), default=5.0, eq=False ) """Logprep tries to react to signals (like sent by CTRL+C) within the given time. The time taken for some processing steps is not always predictable, thus it is not possible to diff --git a/logprep/util/processor_generator.py b/logprep/util/processor_generator.py index ddeb01ca9..5a2ec4f5a 100644 --- a/logprep/util/processor_generator.py +++ b/logprep/util/processor_generator.py @@ -6,14 +6,15 @@ generates boilerplate code to implement a new processor for logprep """ -from typing import Type from pathlib import Path -from attrs import field, validators, define +from typing import Type + +from attrs import define, field, validators from jinja2 import Template from logprep.abc.processor import Processor -from logprep.util.helper import snake_to_camel, camel_to_snake from logprep.registry import Registry +from logprep.util.helper import camel_to_snake, snake_to_camel PROCESSOR_BASE_PATH = "logprep/processor" PROCESSOR_UNIT_TEST_BASE_PATH = "tests/unit/processor" @@ -60,7 +61,7 @@ class ProcessorCodeGenerator: """ name: str = field( - validator=[validators.instance_of(str), validators.matches_re(r"[A-Za-z_]+")], + validator=(validators.instance_of(str), validators.matches_re(r"[A-Za-z_]+")), converter=camel_to_snake, ) diff --git a/pyproject.toml b/pyproject.toml index 3ce6f4e66..2d8f92323 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -113,6 +113,7 @@ dev = [ "pytest-asyncio", "pre-commit", "mypy", + "types-requests" ] doc = [ @@ -161,3 +162,5 @@ environment = 'PATH=$HOME/.cargo/bin:$PATH' [tool.mypy] exclude = ["tests"] +follow_imports = "skip" +follow_imports_for_stubs = "True"