Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEATURE] Add throttle_protection plugin #799

Merged
merged 7 commits into from
Nov 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,14 @@ subtitles

-------------------------------------------------------------------------------

throttle_protection
'''''''''''''''''''
.. autoclass:: ytdl_sub.plugins.throttle_protection.ThrottleProtectionOptions()
:members:
:member-order: bysource

-------------------------------------------------------------------------------

video_tags
''''''''''
.. autoclass:: ytdl_sub.plugins.video_tags.VideoTagsOptions()
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ disable = [
"R0913", # Too many arguments
"R0901", # too-many-ancestors
"R0902", # too-many-instance-attributes
"R1711", # useless-return
"W0511", # TODO
]

Expand Down
17 changes: 11 additions & 6 deletions src/ytdl_sub/cli/parsers/dl.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import hashlib
import re
import shlex
from typing import Any
from typing import Dict
from typing import List
from typing import Tuple
Expand Down Expand Up @@ -116,7 +117,7 @@ def _find_largest_consecutive(cls, indices: List[int]) -> int:
return largest_consecutive + 1

@classmethod
def _argument_name_and_value_to_dict(cls, arg_name: str, arg_value: str) -> Dict:
def _argument_name_and_value_to_dict(cls, arg_name: str, arg_value: Any) -> Dict:
"""
:param arg_name: Argument name in the form of 'key1.key2.key3'
:param arg_value: Argument value
Expand All @@ -134,11 +135,15 @@ def _argument_name_and_value_to_dict(cls, arg_name: str, arg_value: str) -> Dict

next_dict[arg_name_split[-1]] = arg_value

# TODO: handle ints/floats
if arg_value == "True":
next_dict[arg_name_split[-1]] = True
elif arg_value == "False":
next_dict[arg_name_split[-1]] = False
if isinstance(arg_value, str):
if arg_value == "True":
next_dict[arg_name_split[-1]] = True
elif arg_value == "False":
next_dict[arg_name_split[-1]] = False
elif arg_value.isdigit():
next_dict[arg_name_split[-1]] = int(arg_value)
elif arg_value.replace(".", "", 1).isdigit():
next_dict[arg_name_split[-1]] = float(arg_value)

return argument_dict

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from ytdl_sub.plugins.regex import RegexPlugin
from ytdl_sub.plugins.split_by_chapters import SplitByChaptersPlugin
from ytdl_sub.plugins.subtitles import SubtitlesPlugin
from ytdl_sub.plugins.throttle_protection import ThrottleProtectionPlugin
from ytdl_sub.plugins.video_tags import VideoTagsPlugin


Expand All @@ -41,6 +42,7 @@ class PluginMapping:
"subtitles": SubtitlesPlugin,
"chapters": ChaptersPlugin,
"split_by_chapters": SplitByChaptersPlugin,
"throttle_protection": ThrottleProtectionPlugin,
}

@classmethod
Expand Down
2 changes: 1 addition & 1 deletion src/ytdl_sub/config/preset.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from ytdl_sub.config.config_validator import ConfigValidator
from ytdl_sub.config.plugin import Plugin
from ytdl_sub.config.preset_class_mappings import PluginMapping
from ytdl_sub.config.plugin_mapping import PluginMapping
from ytdl_sub.config.preset_options import OptionsValidator
from ytdl_sub.config.preset_options import OutputOptions
from ytdl_sub.config.preset_options import Overrides
Expand Down
233 changes: 233 additions & 0 deletions src/ytdl_sub/plugins/throttle_protection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
import random
import time
from typing import List
from typing import Optional
from typing import Tuple

from ytdl_sub.config.plugin import Plugin
from ytdl_sub.config.preset_options import OptionsDictValidator
from ytdl_sub.config.preset_options import Overrides
from ytdl_sub.entries.entry import Entry
from ytdl_sub.utils.file_handler import FileMetadata
from ytdl_sub.utils.logger import Logger
from ytdl_sub.validators.strict_dict_validator import StrictDictValidator
from ytdl_sub.validators.validators import FloatValidator
from ytdl_sub.validators.validators import ProbabilityValidator
from ytdl_sub.ytdl_additions.enhanced_download_archive import EnhancedDownloadArchive

logger = Logger.get("throttle-protection")


class RandomizedRangeValidator(StrictDictValidator):
"""
Validator to specify a float range between [min, max)
"""

_required_keys = {"max"}
_optional_keys = {"min"}

def __init__(self, name, value):
super().__init__(name, value)

self._max = self._validate_key(key="max", validator=FloatValidator).value
self._min = self._validate_key_if_present(
key="min", validator=FloatValidator, default=0.0
).value

if self._min < 0:
raise self._validation_exception("min must be greater than zero")

Check warning on line 38 in src/ytdl_sub/plugins/throttle_protection.py

View check run for this annotation

Codecov / codecov/patch

src/ytdl_sub/plugins/throttle_protection.py#L38

Added line #L38 was not covered by tests

if self._max < self._min:
raise self._validation_exception(

Check warning on line 41 in src/ytdl_sub/plugins/throttle_protection.py

View check run for this annotation

Codecov / codecov/patch

src/ytdl_sub/plugins/throttle_protection.py#L41

Added line #L41 was not covered by tests
f"max ({self._max}) must be greater than or equal to min ({self._min})"
)

def randomized_float(self) -> float:
"""
Returns
-------
A random float within the range
"""
return random.uniform(self._min, self._max)

def randomized_int(self) -> int:
"""
Returns
-------
A random float within the range, then cast to an integer (floored)
"""
return int(self.randomized_float())


class ThrottleProtectionOptions(OptionsDictValidator):
"""
Provides options to make ytdl-sub look more 'human-like' to protect from throttling. For
range-based values, a random number will be chosen within the range to avoid sleeps looking
scripted.

Usage:

.. code-block:: yaml

presets:
my_example_preset:
throttle_protection:
sleep_per_download_s:
min: 2.2
max: 10.8
sleep_per_subscription_s:
min: 9.0
max: 14.1
max_downloads_per_subscription:
min: 10
max: 36
subscription_download_probability: 1.0
"""

_optional_keys = {
"sleep_per_download_s",
"sleep_per_subscription_s",
"max_downloads_per_subscription",
"subscription_download_probability",
}

def __init__(self, name, value):
super().__init__(name, value)

self._sleep_per_download_s = self._validate_key_if_present(
key="sleep_per_download_s", validator=RandomizedRangeValidator
)
self._sleep_per_subscription_s = self._validate_key_if_present(
key="sleep_per_subscription_s", validator=RandomizedRangeValidator
)
self._max_downloads_per_subscription = self._validate_key_if_present(
key="max_downloads_per_subscription", validator=RandomizedRangeValidator
)
self._subscription_download_probability = self._validate_key_if_present(
key="subscription_download_probability", validator=ProbabilityValidator
)

@property
def sleep_per_download_s(self) -> Optional[RandomizedRangeValidator]:
"""
Number in seconds to sleep between each download. Does not include time it takes for
ytdl-sub to perform post-processing.
"""
return self._sleep_per_download_s

@property
def sleep_per_subscription_s(self) -> Optional[RandomizedRangeValidator]:
"""
Number in seconds to sleep between each subscription.
"""
return self._sleep_per_subscription_s

@property
def max_downloads_per_subscription(self) -> Optional[RandomizedRangeValidator]:
"""
Number of downloads to perform per subscription.
"""
return self._max_downloads_per_subscription

@property
def subscription_download_probability(self) -> Optional[ProbabilityValidator]:
"""
Probability to perform any downloads, recomputed for each subscription. This is only
recommended to set if you run ytdl-sub in a cron-job, that way you are statistically
guaranteed over time to eventually download the subscription.
"""
return self._subscription_download_probability


class ThrottleProtectionPlugin(Plugin[ThrottleProtectionOptions]):
plugin_options_type = ThrottleProtectionOptions

def __init__(
self,
options: ThrottleProtectionOptions,
overrides: Overrides,
enhanced_download_archive: EnhancedDownloadArchive,
):
super().__init__(options, overrides, enhanced_download_archive)
self._subscription_download_counter: int = 0
self._subscription_max_downloads: Optional[int] = None

# If subscriptions have a max download limit, set it here for the first subscription
if self.plugin_options.max_downloads_per_subscription:
self._subscription_max_downloads = (
self.plugin_options.max_downloads_per_subscription.randomized_int()
)

def ytdl_options_match_filters(self) -> Tuple[List[str], List[str]]:
"""
Returns
-------
If subscription_download_probability, match-filters that will perform no downloads
if it's rolled to not download.
"""
perform_download: Tuple[List[str], List[str]] = [], []
do_not_perform_download: Tuple[List[str], List[str]] = [], [
"title = __YTDL_SUB_THROTTLE_PROTECTION_ON_SUBSCRIPTION_DOWNLOAD__"
]

if self.plugin_options.subscription_download_probability:
proba = self.plugin_options.subscription_download_probability.value
# assume proba is set to 1.0, random.random() will always be < 1, can never reach this
if random.random() > proba:
logger.info(
"Subscription download probability of %f missed, skipping this subscription",
proba,
)
return do_not_perform_download

return perform_download

def modify_entry_metadata(self, entry: Entry) -> Optional[Entry]:
if (
self._subscription_max_downloads is not None
and self._subscription_download_counter >= self._subscription_max_downloads
):
if self._subscription_download_counter == self._subscription_max_downloads:
logger.info(
"Reached subscription max downloads of %d for throttle protection",
self._subscription_max_downloads,
)
self._subscription_download_counter += 1 # increment to only print once

return None

return entry

def post_process_entry(self, entry: Entry) -> Optional[FileMetadata]:
if (
self._subscription_max_downloads is not None
and self._subscription_download_counter == 0
):
logger.debug(

Check warning on line 206 in src/ytdl_sub/plugins/throttle_protection.py

View check run for this annotation

Codecov / codecov/patch

src/ytdl_sub/plugins/throttle_protection.py#L206

Added line #L206 was not covered by tests
"Setting subscription max downloads to %d", self._subscription_max_downloads
)

# Increment the counter
self._subscription_download_counter += 1

if self.plugin_options.sleep_per_download_s:
sleep_time = self.plugin_options.sleep_per_download_s.randomized_float()
logger.debug("Sleeping between downloads for %0.2f seconds", sleep_time)
time.sleep(sleep_time)

return None

def post_process_subscription(self):
# Reset counter to 0 for the next subscription
self._subscription_download_counter = 0

# If present, reset max downloads for the next subscription
if self.plugin_options.max_downloads_per_subscription:
self._subscription_max_downloads = (
self.plugin_options.max_downloads_per_subscription.randomized_int
)

if self.plugin_options.sleep_per_subscription_s:
sleep_time = self.plugin_options.sleep_per_subscription_s.randomized_float()
logger.debug("Sleeping between subscriptions for %0.2f seconds", sleep_time)
time.sleep(sleep_time)
9 changes: 9 additions & 0 deletions src/ytdl_sub/validators/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,15 @@
_expected_value_type_name = "int"


class ProbabilityValidator(FloatValidator):
_expected_value_type_name = "probability"

def __init__(self, name, value):
super().__init__(name, value)
if self.value < 0 or self.value > 1:
raise self._validation_exception("Probabilities must be between 0 and 1.0")

Check warning on line 158 in src/ytdl_sub/validators/validators.py

View check run for this annotation

Codecov / codecov/patch

src/ytdl_sub/validators/validators.py#L158

Added line #L158 was not covered by tests


class ListValidator(Validator, ABC, Generic[ValidatorT]):
"""
Validates a list of objects to validate
Expand Down
21 changes: 16 additions & 5 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from typing import Callable
from typing import Dict
from typing import List
from typing import Optional
from unittest.mock import patch

import pytest
Expand Down Expand Up @@ -78,11 +79,17 @@ def reformat_directory() -> Path:


@contextlib.contextmanager
def assert_logs(logger: logging.Logger, expected_message: str, log_level: str = "debug"):
def assert_logs(
logger: logging.Logger,
expected_message: str,
log_level: str = "debug",
expected_occurrences: Optional[int] = None,
):
"""
Patches any function, but calls the original function.
Intended to see if the particular function is called.
"""
occurrences = 0
debug_logger = Logger.get()

def _wrapped_debug(*args, **kwargs):
Expand All @@ -92,10 +99,14 @@ def _wrapped_debug(*args, **kwargs):
yield

for call_args in patched_debug.call_args_list:
if expected_message in call_args.args[0]:
return

assert False, f"{expected_message} was not found in a logger.debug call"
occurrences += int(expected_message in call_args.args[0])

if expected_occurrences:
assert (
occurrences == expected_occurrences
), f"{expected_message} was expected {expected_occurrences} times, got {occurrences}"
else:
assert occurrences > 0, f"{expected_message} was not found in a logger.debug call"


def preset_dict_to_dl_args(preset_dict: Dict) -> str:
Expand Down
1 change: 0 additions & 1 deletion tests/e2e/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from ytdl_sub.cli.entrypoint import main
from ytdl_sub.subscriptions.subscription import Subscription
from ytdl_sub.utils.file_handler import FileHandler
from ytdl_sub.utils.system import IS_WINDOWS


@pytest.fixture
Expand Down
Loading
Loading