diff --git a/docker-compose.yml b/docker-compose.yml index cc7b924..422e872 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -134,9 +134,9 @@ services: environment: - RUN_ENV=production - db_user=ingest - - LOG_LEVEL=ERROR + - LOG_LEVEL=INFO - CELERY_IMPORTS=redditrepostsleuth.core.celery.tasks.ingest_tasks - entrypoint: celery -A redditrepostsleuth.core.celery worker -Q post_ingest -n ingest_worker --autoscale=3,16 + entrypoint: celery -A redditrepostsleuth.core.celery worker -Q post_ingest -n ingest_worker --autoscale=16,1 link_repost_worker: container_name: link-repost-worker diff --git a/docs/dev_docs/modifying_monitored_sub.md b/docs/dev_docs/modifying_monitored_sub.md new file mode 100644 index 0000000..5d3e88d --- /dev/null +++ b/docs/dev_docs/modifying_monitored_sub.md @@ -0,0 +1,5 @@ + +### Adding or Removing Config Values +* Add / Remove config values in core/db/databasemodels.py +* Add/Remove in core/util/default_bot_config.py +* Update sub_monitor_exposed_config_options in the config json \ No newline at end of file diff --git a/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py b/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py index c686307..3308dd5 100644 --- a/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py +++ b/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py @@ -57,35 +57,42 @@ def update_proxies(uowm: UnitOfWorkManager) -> None: ) uow.commit() -def update_top_reposts(uowm: UnitOfWorkManager): +def update_top_reposts(uow: UnitOfWork, post_type_id: int, day_range: int = None): # reddit.info(reddit_ids_to_lookup): - post_types = [2, 3] - day_ranges = [1, 7, 14, 30, 365, None] + log.info('Getting top repostors for post type %s with range %s', post_type_id, day_range) range_query = "SELECT repost_of_id, COUNT(*) c FROM repost WHERE detected_at > NOW() - INTERVAL :days DAY AND post_type_id=:posttype GROUP BY repost_of_id HAVING c > 5 ORDER BY c DESC" all_time_query = "SELECT repost_of_id, COUNT(*) c FROM repost WHERE post_type_id=:posttype GROUP BY repost_of_id HAVING c > 5 ORDER BY c DESC" - with uowm.start() as uow: - for post_type in post_types: - for days in day_ranges: - log.info('Getting top reposts for post type %s with range %s', post_type, days) - if days: - query = range_query - else: - query = all_time_query - uow.session.execute( - text('DELETE FROM stat_top_repost WHERE post_type_id=:posttype AND day_range=:days'), - {'posttype': post_type, 'days': days}) - uow.commit() - result = uow.session.execute(text(query), {'posttype': post_type, 'days': days}) - for row in result: - stat = StatsTopRepost() - stat.post_id = row[0] - stat.post_type_id = post_type - stat.day_range = days - stat.repost_count = row[1] - stat.updated_at = func.utc_timestamp() - stat.nsfw = False - uow.stat_top_repost.add(stat) - uow.commit() + if day_range: + query = range_query + uow.session.execute(text('DELETE FROM stat_top_repost WHERE post_type_id=:posttype AND day_range=:days'), + {'posttype': post_type_id, 'days': day_range}) + else: + query = all_time_query + uow.session.execute(text('DELETE FROM stat_top_repost WHERE post_type_id=:posttype AND day_range IS NULL'), + {'posttype': post_type_id}) + + uow.commit() + + + + result = uow.session.execute(text(query), {'posttype': post_type_id, 'days': day_range}) + for row in result: + stat = StatsTopRepost() + stat.post_id = row[0] + stat.post_type_id = post_type_id + stat.day_range = day_range + stat.repost_count = row[1] + stat.updated_at = func.utc_timestamp() + stat.nsfw = False + uow.stat_top_repost.add(stat) + uow.commit() + +def run_update_top_reposts(uow: UnitOfWork) -> None: + post_types = [1, 2, 3] + day_ranges = [1, 7, 14, 30, None] + for post_type_id in post_types: + for days in day_ranges: + update_top_reposts(uow, post_type_id, days) def update_top_reposters(uow: UnitOfWork, post_type_id: int, day_range: int = None) -> None: log.info('Getting top repostors for post type %s with range %s', post_type_id, day_range) diff --git a/redditrepostsleuth/core/celery/tasks/ingest_tasks.py b/redditrepostsleuth/core/celery/tasks/ingest_tasks.py index 37ce7c8..a2b86bb 100644 --- a/redditrepostsleuth/core/celery/tasks/ingest_tasks.py +++ b/redditrepostsleuth/core/celery/tasks/ingest_tasks.py @@ -12,7 +12,7 @@ @celery.task(bind=True, base=SqlAlchemyTask, ignore_reseults=True, serializer='pickle', autoretry_for=(ConnectionError,ImageConversionException,GalleryNotProcessed), retry_kwargs={'max_retries': 10, 'countdown': 300}) -def save_new_post(self, submission: dict): +def save_new_post(self, submission: dict, repost_check: bool = True): # TODO: temp fix until I can fix imgur gifs if 'imgur' in submission['url'] and 'gifv' in submission['url']: @@ -48,21 +48,22 @@ def save_new_post(self, submission: dict): log.exception('Database save failed: %s', str(e), exc_info=False) return - if post.post_type_id == 1: - celery.send_task('redditrepostsleuth.core.celery.tasks.repost_tasks.check_for_text_repost_task', args=[post]) - elif post.post_type_id == 2: - celery.send_task('redditrepostsleuth.core.celery.tasks.repost_tasks.check_image_repost_save', args=[post]) - elif post.post_type_id == 3: - celery.send_task('redditrepostsleuth.core.celery.tasks.repost_tasks.link_repost_check', args=[post]) + if repost_check: + if post.post_type_id == 1: + celery.send_task('redditrepostsleuth.core.celery.tasks.repost_tasks.check_for_text_repost_task', args=[post]) + elif post.post_type_id == 2: + celery.send_task('redditrepostsleuth.core.celery.tasks.repost_tasks.check_image_repost_save', args=[post]) + elif post.post_type_id == 3: + celery.send_task('redditrepostsleuth.core.celery.tasks.repost_tasks.link_repost_check', args=[post]) celery.send_task('redditrepostsleuth.core.celery.admin_tasks.check_user_for_only_fans', args=[post.author]) @celery.task -def save_new_posts(posts: list[dict]) -> None: +def save_new_posts(posts: list[dict], repost_check: bool = True) -> None: for post in posts: - save_new_post.apply_async((post,)) + save_new_post.apply_async((post, repost_check)) @celery.task(bind=True, base=SqlAlchemyTask, ignore_results=True) def save_pushshift_results(self, data): diff --git a/redditrepostsleuth/core/celery/tasks/scheduled_tasks.py b/redditrepostsleuth/core/celery/tasks/scheduled_tasks.py index 61c5af3..7f2b117 100644 --- a/redditrepostsleuth/core/celery/tasks/scheduled_tasks.py +++ b/redditrepostsleuth/core/celery/tasks/scheduled_tasks.py @@ -9,7 +9,7 @@ from redditrepostsleuth.core.celery import celery from redditrepostsleuth.core.celery.basetasks import RedditTask, SqlAlchemyTask, AdminTask from redditrepostsleuth.core.celery.task_logic.scheduled_task_logic import update_proxies, update_top_reposts, \ - token_checker, run_update_top_reposters, update_top_reposters, update_monitored_sub_data + token_checker, run_update_top_reposters, update_top_reposters, update_monitored_sub_data, run_update_top_reposts from redditrepostsleuth.core.db.databasemodels import MonitoredSub, StatsDailyCount from redditrepostsleuth.core.logging import configure_logger from redditrepostsleuth.core.util.reddithelpers import is_sub_mod_praw, get_bot_permissions @@ -178,6 +178,13 @@ def update_daily_stats(self): log.exception('Problem updating stats') +@celery.task(bind=True, base=SqlAlchemyTask) +def update_all_top_reposts_task(self): + try: + with self.uowm.start() as uow: + run_update_top_reposts(uow) + except Exception as e: + log.exception('Unknown task error') @celery.task(bind=True, base=SqlAlchemyTask) def update_all_top_reposters_task(self): diff --git a/redditrepostsleuth/core/config.py b/redditrepostsleuth/core/config.py index 2718c0d..9d39502 100644 --- a/redditrepostsleuth/core/config.py +++ b/redditrepostsleuth/core/config.py @@ -214,7 +214,7 @@ def _initialize_attributes(self): 'default_text_crosspost_filter', 'default_text_max_days_old_filter', 'default_text_target_distance', - 'discord_logging_hook' + 'discord_logging_hook', ] diff --git a/redditrepostsleuth/core/db/databasemodels.py b/redditrepostsleuth/core/db/databasemodels.py index 1de399e..93741f1 100644 --- a/redditrepostsleuth/core/db/databasemodels.py +++ b/redditrepostsleuth/core/db/databasemodels.py @@ -47,6 +47,7 @@ def __repr__(self) -> str: reports = relationship('UserReport', back_populates='post') hashes = relationship('PostHash', back_populates='post') post_type = relationship('PostType') # lazy has to be set to JSON encoders don't fail for unbound session + #post_type = relationship('PostType', lazy='joined') def to_dict(self): return { @@ -198,7 +199,7 @@ class RepostSearch(Base): Index('idx_post_type_searched_at', 'post_type_id', 'searched_at'), Index('idx_by_subreddit_and_type', 'subreddit', 'source', 'post_type_id', 'matches_found'), Index('idx_source', 'source'), - Index('idx_matches_found', 'matches_found') + Index('idx_matches_found', 'searched_at', 'source', 'matches_found') ) id = Column(Integer, primary_key=True) post_id = Column(Integer, ForeignKey('post.id')) @@ -351,10 +352,14 @@ class MonitoredSub(Base): adult_promoter_remove_post = Column(Boolean, default=False) adult_promoter_ban_user = Column(Boolean, default=False) adult_promoter_notify_mod_mail = Column(Boolean, default=False) + adult_promoter_removal_reason = Column(String(300)) + adult_promoter_ban_reason = Column(String(300)) high_volume_reposter_ban_user = Column(Boolean, default=False) high_volume_reposter_remove_post = Column(Boolean, default=False) high_volume_reposter_threshold = Column(Integer, default=100) high_volume_reposter_notify_mod_mail = Column(Boolean, default=False) + high_volume_reposter_removal_reason = Column(String(300)) + high_volume_reposter_ban_reason = Column(String(300)) post_checks = relationship("MonitoredSubChecks", back_populates='monitored_sub', cascade='all, delete', ) config_revisions = relationship("MonitoredSubConfigRevision", back_populates='monitored_sub', cascade='all, delete') @@ -422,7 +427,12 @@ def to_dict(self): 'high_volume_reposter_ban_user': self.high_volume_reposter_ban_user, 'high_volume_reposter_remove_post': self.high_volume_reposter_remove_post, 'high_volume_reposter_threshold': self.high_volume_reposter_threshold, - 'high_volume_reposter_notify_mod_mail': self.high_volume_reposter_notify_mod_mail + 'high_volume_reposter_notify_mod_mail': self.high_volume_reposter_notify_mod_mail, + 'high_volume_reposter_removal_reason': self.high_volume_reposter_removal_reason, + 'high_volume_reposter_ban_reason': self.high_volume_reposter_ban_reason, + 'adult_promoter_removal_reason': self.adult_promoter_removal_reason, + 'adult_promoter_ban_reason': self.adult_promoter_ban_reason + } diff --git a/redditrepostsleuth/core/model/misc_models.py b/redditrepostsleuth/core/model/misc_models.py index c883bf9..89fde2e 100644 --- a/redditrepostsleuth/core/model/misc_models.py +++ b/redditrepostsleuth/core/model/misc_models.py @@ -11,6 +11,7 @@ class JobStatus(Enum): TIMEOUT = auto() PROXYERROR = auto() ERROR = auto() + RATELIMIT = auto() @dataclass class BatchedPostRequestJob: diff --git a/redditrepostsleuth/core/services/responsebuilder.py b/redditrepostsleuth/core/services/responsebuilder.py index 5cc12b7..17d3fef 100644 --- a/redditrepostsleuth/core/services/responsebuilder.py +++ b/redditrepostsleuth/core/services/responsebuilder.py @@ -130,7 +130,7 @@ def build_sub_comment( try: return self.build_default_comment(search_results, message, **kwargs) - except KeyError: + except KeyError as e: log.warning('Custom repost template for %s has a bad slug: %s', monitored_sub.name, monitored_sub.repost_response_template) return self.build_default_comment(search_results, **kwargs) diff --git a/redditrepostsleuth/core/util/default_bot_config.py b/redditrepostsleuth/core/util/default_bot_config.py index 6a6977a..4211a00 100644 --- a/redditrepostsleuth/core/util/default_bot_config.py +++ b/redditrepostsleuth/core/util/default_bot_config.py @@ -33,9 +33,13 @@ "adult_promoter_remove_post": False, "adult_promoter_ban_user": False, "adult_promoter_notify_mod_mail": False, + "adult_promoter_ban_reason": None, + "adult_promoter_removal_reason": None, "high_volume_reposter_ban_user": False, "high_volume_reposter_remove_post": False, "high_volume_reposter_threshold": 150, "high_volume_reposter_notify_mod_mail": False, + "high_volume_reposter_removal_reason": None, + "high_volume_reposter_ban_reason": None } diff --git a/redditrepostsleuth/ingestsvc/ingestsvc.py b/redditrepostsleuth/ingestsvc/ingestsvc.py index 1ff762e..8f17965 100644 --- a/redditrepostsleuth/ingestsvc/ingestsvc.py +++ b/redditrepostsleuth/ingestsvc/ingestsvc.py @@ -3,7 +3,7 @@ import json import os import time -from asyncio import ensure_future, gather, run, TimeoutError +from asyncio import ensure_future, gather, run, TimeoutError, CancelledError from datetime import datetime from typing import List, Optional @@ -15,6 +15,7 @@ from redditrepostsleuth.core.db.databasemodels import Post from redditrepostsleuth.core.db.db_utils import get_db_engine from redditrepostsleuth.core.db.uow.unitofworkmanager import UnitOfWorkManager +from redditrepostsleuth.core.exception import RateLimitException, UtilApiException from redditrepostsleuth.core.logging import configure_logger from redditrepostsleuth.core.model.misc_models import BatchedPostRequestJob, JobStatus from redditrepostsleuth.core.util.helpers import get_reddit_instance, get_newest_praw_post_id, get_next_ids, \ @@ -36,6 +37,7 @@ config = Config() REMOVAL_REASONS_TO_SKIP = ['deleted', 'author', 'reddit', 'copyright_takedown'] +HEADERS = {'User-Agent': 'u/RepostSleuthBot - Submission Ingest (by u/BarryCarey)'} async def fetch_page(url: str, session: ClientSession) -> Optional[str]: @@ -45,12 +47,21 @@ async def fetch_page(url: str, session: ClientSession) -> Optional[str]: :param session: AIOHttp session to use :return: raw response from request """ - async with session.get(url, timeout=ClientTimeout(total=10)) as resp: + log.debug('Page fetch') + + async with session.get(url, timeout=ClientTimeout(total=10), headers=HEADERS) as resp: try: if resp.status == 200: log.debug('Successful fetch') - return await resp.text() + try: + return await resp.text() + except CancelledError: + log.error('Canceled on getting text') + raise UtilApiException('Canceled') else: + if resp.status == 429: + text = await resp.text() + raise RateLimitException('Data API rate limit') log.info('Unexpected request status %s - %s', resp.status, url) return except (ClientOSError, TimeoutError): @@ -68,11 +79,15 @@ async def fetch_page_as_job(job: BatchedPostRequestJob, session: ClientSession) :rtype: BatchedPostRequestJob """ try: - async with session.get(job.url, timeout=ClientTimeout(total=10)) as resp: + async with session.get(job.url, timeout=ClientTimeout(total=10), headers=HEADERS) as resp: if resp.status == 200: log.debug('Successful fetch') job.status = JobStatus.SUCCESS + log.debug('Fetching response text') job.resp_data = await resp.text() + elif resp.status == 429: + log.warning('Data API Rate Limit') + job.status = JobStatus.RATELIMIT else: log.warning('Unexpected request status %s - %s', resp.status, job.url) job.status = JobStatus.ERROR @@ -106,7 +121,7 @@ async def ingest_range(newest_post_id: str, oldest_post_id: str) -> None: tasks = [] conn = TCPConnector(limit=0) - async with ClientSession(connector=conn) as session: + async with ClientSession(connector=conn, headers=HEADERS) as session: while True: try: chunk = list(itertools.islice(missing_ids, 100)) @@ -114,6 +129,7 @@ async def ingest_range(newest_post_id: str, oldest_post_id: str) -> None: break url = f'{config.util_api}/reddit/info?submission_ids={build_reddit_query_string(chunk)}' + #url = f'https://api.reddit.com/api/info?id={build_reddit_query_string(chunk)}' job = BatchedPostRequestJob(url, chunk, JobStatus.STARTED) tasks.append(ensure_future(fetch_page_as_job(job, session))) if len(tasks) >= 50 or len(chunk) == 0: @@ -139,10 +155,15 @@ async def ingest_range(newest_post_id: str, oldest_post_id: str) -> None: else: tasks.append(ensure_future(fetch_page_as_job(j, session))) + any_rate_limit = next((x for x in results if x.status == JobStatus.RATELIMIT), None) + if any_rate_limit: + log.info('Some jobs hit data rate limit, waiting') + await asyncio.sleep(10) + log.info('Sending %s posts to save queue', len(posts_to_save)) # save_new_posts.apply_async(([reddit_submission_to_post(submission) for submission in posts_to_save],)) - save_new_posts.apply_async((posts_to_save,)) + save_new_posts.apply_async((posts_to_save, True)) if len(chunk) == 0: break @@ -169,53 +190,61 @@ async def main() -> None: oldest_post = uow.posts.get_newest_post() oldest_id = oldest_post.post_id - await ingest_range(newest_id, oldest_id) - async with ClientSession() as session: - delay = 0 - while True: - ids_to_get = get_next_ids(newest_id, 100) - url = f'{config.util_api}/reddit/info?submission_ids={build_reddit_query_string(ids_to_get)}' + #await ingest_range(newest_id, oldest_id) + + delay = 0 + while True: + ids_to_get = get_next_ids(newest_id, 100) + #url = f'{config.util_api}/reddit/info?submission_ids={build_reddit_query_string(ids_to_get)}' + url = f'https://api.reddit.com/api/info?id={build_reddit_query_string(ids_to_get)}' + async with ClientSession(headers=HEADERS) as session: try: + log.debug('Sending fetch request') results = await fetch_page(url, session) - except (ServerDisconnectedError, ClientConnectorError, ClientOSError, TimeoutError): + except (ServerDisconnectedError, ClientConnectorError, ClientOSError, TimeoutError, CancelledError, UtilApiException): log.warning('Error during fetch') await asyncio.sleep(2) continue - - if not results: + except RateLimitException: + log.warning('Hit Data API Rate Limit') + await asyncio.sleep(10) continue - res_data = json.loads(results) - if not res_data or not len(res_data['data']['children']): - log.info('No results') + if not results: + log.debug('No results') + continue + + res_data = json.loads(results) + if not res_data or not len(res_data['data']['children']): + log.info('No results') + continue + + log.info('%s results returned from API', len(res_data['data']['children'])) + if len(res_data['data']['children']) < 91: + delay += 1 + log.debug('Delay increased by 1. Current delay: %s', delay) + else: + if delay > 0: + delay -= 1 + log.debug('Delay decreased by 1. Current delay: %s', delay) + + posts_to_save = [] + for post in res_data['data']['children']: + if post['data']['removed_by_category'] in REMOVAL_REASONS_TO_SKIP: continue + posts_to_save.append(post['data']) - log.info('%s results returned from API', len(res_data['data']['children'])) - if len(res_data['data']['children']) < 90: - delay += 1 - log.debug('Delay increased by 1. Current delay: %s', delay) - else: - if delay > 0: - delay -= 1 - log.debug('Delay decreased by 1. Current delay: %s', delay) - - posts_to_save = [] - for post in res_data['data']['children']: - if post['data']['removed_by_category'] in REMOVAL_REASONS_TO_SKIP: - continue - posts_to_save.append(post['data']) - - log.info('Sending %s posts to save queue', len(posts_to_save)) - # queue_posts_for_ingest([reddit_submission_to_post(submission) for submission in posts_to_save]) - queue_posts_for_ingest(posts_to_save) + log.info('Sending %s posts to save queue', len(posts_to_save)) + # queue_posts_for_ingest([reddit_submission_to_post(submission) for submission in posts_to_save]) + queue_posts_for_ingest(posts_to_save) - ingest_delay = datetime.utcnow() - datetime.utcfromtimestamp( - res_data['data']['children'][0]['data']['created_utc']) - log.info('Current Delay: %s', ingest_delay) + ingest_delay = datetime.utcnow() - datetime.utcfromtimestamp( + res_data['data']['children'][0]['data']['created_utc']) + log.info('Current Delay: %s', ingest_delay) - newest_id = res_data['data']['children'][-1]['data']['id'] + newest_id = res_data['data']['children'][-1]['data']['id'] - time.sleep(delay) + time.sleep(delay) if __name__ == '__main__': diff --git a/redditrepostsleuth/ingestsvc/requirements.txt b/redditrepostsleuth/ingestsvc/requirements.txt index ca9da3f..1765168 100644 --- a/redditrepostsleuth/ingestsvc/requirements.txt +++ b/redditrepostsleuth/ingestsvc/requirements.txt @@ -5,6 +5,6 @@ sqlalchemy==2.0.20 pymysql==1.1.0 imagehash==4.3.1 influxdb-client==1.37.0 -aiohttp==3.8.5 +aiohttp==3.9.0 sentry-sdk==1.29.2 -cryptography==41.0.3 \ No newline at end of file +cryptography==41.0.6 \ No newline at end of file diff --git a/redditrepostsleuth/repostsleuthsiteapi/requirements.txt b/redditrepostsleuth/repostsleuthsiteapi/requirements.txt index 00549ce..b3db7de 100644 --- a/redditrepostsleuth/repostsleuthsiteapi/requirements.txt +++ b/redditrepostsleuth/repostsleuthsiteapi/requirements.txt @@ -18,4 +18,4 @@ redlock==1.2.0 celery[redis]==5.3.1 pydantic==1.10.9 sentry-sdk==1.29.2 -cryptography==41.0.3 \ No newline at end of file +cryptography==41.0.6 \ No newline at end of file diff --git a/redditrepostsleuth/submonitorsvc/monitored_sub_service.py b/redditrepostsleuth/submonitorsvc/monitored_sub_service.py index 40e8e0d..99d897e 100644 --- a/redditrepostsleuth/submonitorsvc/monitored_sub_service.py +++ b/redditrepostsleuth/submonitorsvc/monitored_sub_service.py @@ -111,7 +111,11 @@ def handle_only_fans_check( f'Post by [{post.author}](https://reddit.com/u/{post.author}) removed from [r/{post.subreddit}](https://reddit.com/r/{post.subreddit})', subject='Onlyfans Removal' ) - self._remove_post(monitored_sub, self.reddit.submission(post.post_id)) + + self._remove_post( + monitored_sub.adult_promoter_removal_reason, + self.reddit.submission(post.post_id) + ) if monitored_sub.adult_promoter_ban_user: if self.notification_svc: @@ -119,7 +123,7 @@ def handle_only_fans_check( f'User [{post.author}](https://reddit.com/u/{post.author}) banned from [r/{post.subreddit}](https://reddit.com/r/{post.subreddit})', subject='Onlyfans Ban Issued' ) - self._ban_user(post.author, monitored_sub.name, user.notes) + self._ban_user(post.author, monitored_sub.name, monitored_sub.adult_promoter_ban_reason or user.notes) if monitored_sub.adult_promoter_notify_mod_mail: message_body = ADULT_PROMOTER_SUBMISSION_FOUND.format( @@ -179,7 +183,10 @@ def handle_high_volume_reposter_check( f'Post by [{post.author}](https://reddit.com/u/{post.author}) removed from [r/{post.subreddit}](https://reddit.com/r/{post.subreddit})', subject='High Volume Removal' ) - self._remove_post(monitored_sub, self.reddit.submission(post.post_id)) + self._remove_post( + monitored_sub.high_volume_reposter_removal_reason, + self.reddit.submission(post.post_id) + ) if monitored_sub.high_volume_reposter_ban_user: if self.notification_svc: @@ -187,7 +194,11 @@ def handle_high_volume_reposter_check( f'User [{post.author}](https://reddit.com/u/{post.author}) banned from [r/{post.subreddit}](https://reddit.com/r/{post.subreddit})', subject='High Volume Reposter Ban Issued' ) - self._ban_user(post.author, monitored_sub.name, 'High volume of reposts detected by Repost Sleuth') + self._ban_user( + post.author, + monitored_sub.name, + monitored_sub.high_volume_reposter_ban_reason or 'High volume of reposts detected by Repost Sleuth' + ) if monitored_sub.high_volume_reposter_notify_mod_mail: message_body = HIGH_VOLUME_REPOSTER_FOUND.format( @@ -302,7 +313,8 @@ def check_submission(self, monitored_sub: MonitoredSub, post: Post) -> Optional[ report_msg = self.response_builder.build_report_msg(monitored_sub.name, msg_values) self._report_submission(monitored_sub, submission, report_msg) self._lock_post(monitored_sub, submission) - self._remove_post(monitored_sub, submission) + if monitored_sub.remove_repost: + self._remove_post(monitored_sub.removal_reason, submission) self._send_mod_mail(monitored_sub, search_results) else: self._mark_post_as_oc(monitored_sub, submission) @@ -396,21 +408,20 @@ def _lock_comment(self, monitored_sub: MonitoredSub, comment: Comment) -> None: except Exception as e: log.exception('Failed to lock comment', exc_info=True) - def _remove_post(self, monitored_sub: MonitoredSub, submission: Submission, mod_note: str = None) -> None: + def _remove_post(self, removal_reason: str, submission: Submission, mod_note: str = None) -> None: """ Check if given sub wants posts removed. Remove is enabled @param monitored_sub: Monitored sub @param submission: Submission to remove """ - if monitored_sub.remove_repost: - try: - removal_reason_id = self._get_removal_reason_id(monitored_sub.removal_reason, submission.subreddit) - log.info('Attempting to remove post https://redd.it/%s with removal ID %s', submission.id, removal_reason_id) - submission.mod.remove(reason_id=removal_reason_id, mod_note=mod_note) - except Forbidden: - log.error('Failed to remove post https://redd.it/%s, no permission', submission.id) - except Exception as e: - log.exception('Failed to remove submission https://redd.it/%s', submission.id, exc_info=True) + try: + removal_reason_id = self._get_removal_reason_id(removal_reason, submission.subreddit) + log.info('Attempting to remove post https://redd.it/%s with removal ID %s', submission.id, removal_reason_id) + submission.mod.remove(reason_id=removal_reason_id, mod_note=mod_note) + except Forbidden: + log.error('Failed to remove post https://redd.it/%s, no permission', submission.id) + except Exception as e: + log.exception('Failed to remove submission https://redd.it/%s', submission.id, exc_info=True) def _get_removal_reason_id(self, removal_reason: str, subreddit: Subreddit) -> Optional[str]: if not removal_reason: diff --git a/redditrepostsleuth/summonssvc/requirements.txt b/redditrepostsleuth/summonssvc/requirements.txt index c8e6534..61d7667 100644 --- a/redditrepostsleuth/summonssvc/requirements.txt +++ b/redditrepostsleuth/summonssvc/requirements.txt @@ -10,4 +10,4 @@ python-Levenshtein==0.21.1 distance==0.1.3 pydantic==1.10.9 sentry-sdk==1.29.2 -cryptography==41.0.3 \ No newline at end of file +cryptography==41.0.6 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 6514176..1dfe0aa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,8 +10,8 @@ python-Levenshtein==0.21.1 distance==0.1.3 pydantic==1.10.9 sentry-sdk==1.29.2 -aiohttp==3.8.5 +aiohttp==3.9.0 pyjwt==2.8.0 gunicorn==21.2.0 falcon==3.1.1 -cryptography==41.0.3 \ No newline at end of file +cryptography==41.0.6 \ No newline at end of file diff --git a/tests/submonitorsvc/test_subMonitor.py b/tests/submonitorsvc/test_subMonitor.py index 3bd369f..fe41e6b 100644 --- a/tests/submonitorsvc/test_subMonitor.py +++ b/tests/submonitorsvc/test_subMonitor.py @@ -121,7 +121,12 @@ def test__handle_only_fans_flagged_user_ban_user(self, mock_ban_user, mock_remov def test__handle_only_fans_flagged_user_remove_post(self, mock_ban_user, mock_remove_post): user_review = UserReview(content_links_found=1, username='test_user', notes='Profile links match onlyfans.com') post = Post(subreddit='test_subreddit', author='test_user') - monitored_sub = MonitoredSub(name='test_subreddit', adult_promoter_remove_post=True, adult_promoter_ban_user=False) + monitored_sub = MonitoredSub( + name='test_subreddit', + adult_promoter_remove_post=True, + adult_promoter_ban_user=False, + adult_promoter_removal_reason='Removed' + ) mock_uow = MagicMock( user_review=MagicMock(get_by_username=MagicMock(return_value=user_review)), user_whitelist=MagicMock(get_by_username_and_subreddit=MagicMock(return_value=None)) @@ -133,7 +138,7 @@ def test__handle_only_fans_flagged_user_remove_post(self, mock_ban_user, mock_re sub_monitor.handle_only_fans_check(post, mock_uow, monitored_sub) mock_ban_user.assert_not_called() - mock_remove_post.assert_called_once_with(monitored_sub, ANY) + mock_remove_post.assert_called_once_with('Removed', ANY) @patch.object(MonitoredSubService, '_remove_post') @patch.object(MonitoredSubService, '_ban_user') @@ -172,12 +177,13 @@ def test__handle_high_volume_reposter_check_over_threshold_remove(self, mock_ban high_volume_reposter_ban_user=False, high_volume_reposter_threshold=100, high_volume_reposter_notify_mod_mail=False, - high_volume_reposter_remove_post=True + high_volume_reposter_remove_post=True, + high_volume_reposter_removal_reason='Removed' ) post = Post(subreddit='test_subreddit', author='test_user') sub_monitor.handle_high_volume_reposter_check(post, mock_uow, monitored_sub) mock_ban_user.assert_not_called() - mock_remove_post.assert_called_once_with(monitored_sub, ANY) + mock_remove_post.assert_called_once_with('Removed', ANY) mock_response_handler.send_mod_mail.assert_not_called() @patch.object(MonitoredSubService, '_remove_post') @@ -195,12 +201,13 @@ def test__handle_high_volume_reposter_check_over_threshold_remove_and_ban(self, high_volume_reposter_ban_user=True, high_volume_reposter_threshold=100, high_volume_reposter_notify_mod_mail=False, - high_volume_reposter_remove_post=True + high_volume_reposter_remove_post=True, + high_volume_reposter_removal_reason='Removed' ) post = Post(subreddit='test_subreddit', author='test_user') sub_monitor.handle_high_volume_reposter_check(post, mock_uow, monitored_sub) mock_ban_user.assert_called_once_with('test_user', 'test_subreddit', 'High volume of reposts detected by Repost Sleuth') - mock_remove_post.assert_called_once_with(monitored_sub, ANY) + mock_remove_post.assert_called_once_with('Removed', ANY) mock_response_handler.send_mod_mail.assert_not_called() @patch.object(MonitoredSubService, '_remove_post') diff --git a/worker-requirements.txt b/worker-requirements.txt index 0f5f254..7548efc 100644 --- a/worker-requirements.txt +++ b/worker-requirements.txt @@ -11,4 +11,4 @@ distance==0.1.3 pydantic==1.10.9 sentry-sdk==1.29.2 pyjwt==2.8.0 -cryptography==41.0.3 \ No newline at end of file +cryptography==41.0.6 \ No newline at end of file