Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/onlyfans messages #359

Merged
merged 21 commits into from
Feb 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
6c8f898
Merge pull request #326 from barrycarey/dev
barrycarey Aug 31, 2023
45fb455
Merge pull request #328 from barrycarey/dev
barrycarey Aug 31, 2023
7044bbb
Merge pull request #330 from barrycarey/dev
barrycarey Sep 1, 2023
3ffc483
Merge pull request #334 from barrycarey/dev
barrycarey Sep 11, 2023
9501abf
Merge pull request #338 from barrycarey/dev
barrycarey Sep 20, 2023
be69fa9
Bump cryptography in /redditrepostsleuth/summonssvc
dependabot[bot] Nov 28, 2023
d20844a
Bump cryptography from 41.0.3 to 41.0.6
dependabot[bot] Nov 28, 2023
27956b5
Bump cryptography from 41.0.3 to 41.0.6 in /redditrepostsleuth/ingestsvc
dependabot[bot] Nov 28, 2023
f54b35b
Bump cryptography in /redditrepostsleuth/repostsleuthsiteapi
dependabot[bot] Nov 29, 2023
cd1aab2
Merge pull request #352 from barrycarey/dev
barrycarey Dec 14, 2023
5267cc0
Merge pull request #351 from barrycarey/dependabot/pip/redditrepostsl…
barrycarey Dec 14, 2023
03683f8
Merge pull request #350 from barrycarey/dependabot/pip/redditrepostsl…
barrycarey Dec 14, 2023
b71023e
Merge pull request #349 from barrycarey/dependabot/pip/redditrepostsl…
barrycarey Dec 14, 2023
ea2dd6e
Merge pull request #348 from barrycarey/dependabot/pip/cryptography-4…
barrycarey Dec 14, 2023
a7056d9
Bump aiohttp from 3.8.5 to 3.9.0 in /redditrepostsleuth/ingestsvc
dependabot[bot] Dec 14, 2023
6e6e104
Bump aiohttp from 3.8.5 to 3.9.0
dependabot[bot] Dec 14, 2023
27fad3e
Merge pull request #347 from barrycarey/dependabot/pip/redditrepostsl…
barrycarey Dec 14, 2023
d6a4a42
Merge pull request #346 from barrycarey/dependabot/pip/aiohttp-3.9.0
barrycarey Dec 14, 2023
5b9efd3
Ingest and scheduled task fixes unrelated to branch name.
barrycarey Jan 22, 2024
2a10054
most support for ban and remove OF messages
barrycarey Feb 10, 2024
a74ce36
Working implementation of removal reasons
barrycarey Feb 11, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,9 @@ services:
environment:
- RUN_ENV=production
- db_user=ingest
- LOG_LEVEL=ERROR
- LOG_LEVEL=INFO
- CELERY_IMPORTS=redditrepostsleuth.core.celery.tasks.ingest_tasks
entrypoint: celery -A redditrepostsleuth.core.celery worker -Q post_ingest -n ingest_worker --autoscale=3,16
entrypoint: celery -A redditrepostsleuth.core.celery worker -Q post_ingest -n ingest_worker --autoscale=16,1

link_repost_worker:
container_name: link-repost-worker
Expand Down
5 changes: 5 additions & 0 deletions docs/dev_docs/modifying_monitored_sub.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

### Adding or Removing Config Values
* Add / Remove config values in core/db/databasemodels.py
* Add/Remove in core/util/default_bot_config.py
* Update sub_monitor_exposed_config_options in the config json
59 changes: 33 additions & 26 deletions redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,35 +57,42 @@ def update_proxies(uowm: UnitOfWorkManager) -> None:
)
uow.commit()

def update_top_reposts(uowm: UnitOfWorkManager):
def update_top_reposts(uow: UnitOfWork, post_type_id: int, day_range: int = None):
# reddit.info(reddit_ids_to_lookup):
post_types = [2, 3]
day_ranges = [1, 7, 14, 30, 365, None]
log.info('Getting top repostors for post type %s with range %s', post_type_id, day_range)
range_query = "SELECT repost_of_id, COUNT(*) c FROM repost WHERE detected_at > NOW() - INTERVAL :days DAY AND post_type_id=:posttype GROUP BY repost_of_id HAVING c > 5 ORDER BY c DESC"
all_time_query = "SELECT repost_of_id, COUNT(*) c FROM repost WHERE post_type_id=:posttype GROUP BY repost_of_id HAVING c > 5 ORDER BY c DESC"
with uowm.start() as uow:
for post_type in post_types:
for days in day_ranges:
log.info('Getting top reposts for post type %s with range %s', post_type, days)
if days:
query = range_query
else:
query = all_time_query
uow.session.execute(
text('DELETE FROM stat_top_repost WHERE post_type_id=:posttype AND day_range=:days'),
{'posttype': post_type, 'days': days})
uow.commit()
result = uow.session.execute(text(query), {'posttype': post_type, 'days': days})
for row in result:
stat = StatsTopRepost()
stat.post_id = row[0]
stat.post_type_id = post_type
stat.day_range = days
stat.repost_count = row[1]
stat.updated_at = func.utc_timestamp()
stat.nsfw = False
uow.stat_top_repost.add(stat)
uow.commit()
if day_range:
query = range_query
uow.session.execute(text('DELETE FROM stat_top_repost WHERE post_type_id=:posttype AND day_range=:days'),
{'posttype': post_type_id, 'days': day_range})
else:
query = all_time_query
uow.session.execute(text('DELETE FROM stat_top_repost WHERE post_type_id=:posttype AND day_range IS NULL'),
{'posttype': post_type_id})

uow.commit()



result = uow.session.execute(text(query), {'posttype': post_type_id, 'days': day_range})
for row in result:
stat = StatsTopRepost()
stat.post_id = row[0]
stat.post_type_id = post_type_id
stat.day_range = day_range
stat.repost_count = row[1]
stat.updated_at = func.utc_timestamp()
stat.nsfw = False
uow.stat_top_repost.add(stat)
uow.commit()

def run_update_top_reposts(uow: UnitOfWork) -> None:
post_types = [1, 2, 3]
day_ranges = [1, 7, 14, 30, None]
for post_type_id in post_types:
for days in day_ranges:
update_top_reposts(uow, post_type_id, days)

def update_top_reposters(uow: UnitOfWork, post_type_id: int, day_range: int = None) -> None:
log.info('Getting top repostors for post type %s with range %s', post_type_id, day_range)
Expand Down
19 changes: 10 additions & 9 deletions redditrepostsleuth/core/celery/tasks/ingest_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@


@celery.task(bind=True, base=SqlAlchemyTask, ignore_reseults=True, serializer='pickle', autoretry_for=(ConnectionError,ImageConversionException,GalleryNotProcessed), retry_kwargs={'max_retries': 10, 'countdown': 300})
def save_new_post(self, submission: dict):
def save_new_post(self, submission: dict, repost_check: bool = True):

# TODO: temp fix until I can fix imgur gifs
if 'imgur' in submission['url'] and 'gifv' in submission['url']:
Expand Down Expand Up @@ -48,21 +48,22 @@ def save_new_post(self, submission: dict):
log.exception('Database save failed: %s', str(e), exc_info=False)
return

if post.post_type_id == 1:
celery.send_task('redditrepostsleuth.core.celery.tasks.repost_tasks.check_for_text_repost_task', args=[post])
elif post.post_type_id == 2:
celery.send_task('redditrepostsleuth.core.celery.tasks.repost_tasks.check_image_repost_save', args=[post])
elif post.post_type_id == 3:
celery.send_task('redditrepostsleuth.core.celery.tasks.repost_tasks.link_repost_check', args=[post])
if repost_check:
if post.post_type_id == 1:
celery.send_task('redditrepostsleuth.core.celery.tasks.repost_tasks.check_for_text_repost_task', args=[post])
elif post.post_type_id == 2:
celery.send_task('redditrepostsleuth.core.celery.tasks.repost_tasks.check_image_repost_save', args=[post])
elif post.post_type_id == 3:
celery.send_task('redditrepostsleuth.core.celery.tasks.repost_tasks.link_repost_check', args=[post])

celery.send_task('redditrepostsleuth.core.celery.admin_tasks.check_user_for_only_fans', args=[post.author])



@celery.task
def save_new_posts(posts: list[dict]) -> None:
def save_new_posts(posts: list[dict], repost_check: bool = True) -> None:
for post in posts:
save_new_post.apply_async((post,))
save_new_post.apply_async((post, repost_check))

@celery.task(bind=True, base=SqlAlchemyTask, ignore_results=True)
def save_pushshift_results(self, data):
Expand Down
9 changes: 8 additions & 1 deletion redditrepostsleuth/core/celery/tasks/scheduled_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from redditrepostsleuth.core.celery import celery
from redditrepostsleuth.core.celery.basetasks import RedditTask, SqlAlchemyTask, AdminTask
from redditrepostsleuth.core.celery.task_logic.scheduled_task_logic import update_proxies, update_top_reposts, \
token_checker, run_update_top_reposters, update_top_reposters, update_monitored_sub_data
token_checker, run_update_top_reposters, update_top_reposters, update_monitored_sub_data, run_update_top_reposts
from redditrepostsleuth.core.db.databasemodels import MonitoredSub, StatsDailyCount
from redditrepostsleuth.core.logging import configure_logger
from redditrepostsleuth.core.util.reddithelpers import is_sub_mod_praw, get_bot_permissions
Expand Down Expand Up @@ -178,6 +178,13 @@ def update_daily_stats(self):
log.exception('Problem updating stats')


@celery.task(bind=True, base=SqlAlchemyTask)
def update_all_top_reposts_task(self):
try:
with self.uowm.start() as uow:
run_update_top_reposts(uow)
except Exception as e:
log.exception('Unknown task error')

@celery.task(bind=True, base=SqlAlchemyTask)
def update_all_top_reposters_task(self):
Expand Down
2 changes: 1 addition & 1 deletion redditrepostsleuth/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def _initialize_attributes(self):
'default_text_crosspost_filter',
'default_text_max_days_old_filter',
'default_text_target_distance',
'discord_logging_hook'
'discord_logging_hook',

]

Expand Down
14 changes: 12 additions & 2 deletions redditrepostsleuth/core/db/databasemodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def __repr__(self) -> str:
reports = relationship('UserReport', back_populates='post')
hashes = relationship('PostHash', back_populates='post')
post_type = relationship('PostType') # lazy has to be set to JSON encoders don't fail for unbound session
#post_type = relationship('PostType', lazy='joined')

def to_dict(self):
return {
Expand Down Expand Up @@ -198,7 +199,7 @@ class RepostSearch(Base):
Index('idx_post_type_searched_at', 'post_type_id', 'searched_at'),
Index('idx_by_subreddit_and_type', 'subreddit', 'source', 'post_type_id', 'matches_found'),
Index('idx_source', 'source'),
Index('idx_matches_found', 'matches_found')
Index('idx_matches_found', 'searched_at', 'source', 'matches_found')
)
id = Column(Integer, primary_key=True)
post_id = Column(Integer, ForeignKey('post.id'))
Expand Down Expand Up @@ -351,10 +352,14 @@ class MonitoredSub(Base):
adult_promoter_remove_post = Column(Boolean, default=False)
adult_promoter_ban_user = Column(Boolean, default=False)
adult_promoter_notify_mod_mail = Column(Boolean, default=False)
adult_promoter_removal_reason = Column(String(300))
adult_promoter_ban_reason = Column(String(300))
high_volume_reposter_ban_user = Column(Boolean, default=False)
high_volume_reposter_remove_post = Column(Boolean, default=False)
high_volume_reposter_threshold = Column(Integer, default=100)
high_volume_reposter_notify_mod_mail = Column(Boolean, default=False)
high_volume_reposter_removal_reason = Column(String(300))
high_volume_reposter_ban_reason = Column(String(300))

post_checks = relationship("MonitoredSubChecks", back_populates='monitored_sub', cascade='all, delete', )
config_revisions = relationship("MonitoredSubConfigRevision", back_populates='monitored_sub', cascade='all, delete')
Expand Down Expand Up @@ -422,7 +427,12 @@ def to_dict(self):
'high_volume_reposter_ban_user': self.high_volume_reposter_ban_user,
'high_volume_reposter_remove_post': self.high_volume_reposter_remove_post,
'high_volume_reposter_threshold': self.high_volume_reposter_threshold,
'high_volume_reposter_notify_mod_mail': self.high_volume_reposter_notify_mod_mail
'high_volume_reposter_notify_mod_mail': self.high_volume_reposter_notify_mod_mail,
'high_volume_reposter_removal_reason': self.high_volume_reposter_removal_reason,
'high_volume_reposter_ban_reason': self.high_volume_reposter_ban_reason,
'adult_promoter_removal_reason': self.adult_promoter_removal_reason,
'adult_promoter_ban_reason': self.adult_promoter_ban_reason


}

Expand Down
1 change: 1 addition & 0 deletions redditrepostsleuth/core/model/misc_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class JobStatus(Enum):
TIMEOUT = auto()
PROXYERROR = auto()
ERROR = auto()
RATELIMIT = auto()

@dataclass
class BatchedPostRequestJob:
Expand Down
2 changes: 1 addition & 1 deletion redditrepostsleuth/core/services/responsebuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def build_sub_comment(

try:
return self.build_default_comment(search_results, message, **kwargs)
except KeyError:
except KeyError as e:
log.warning('Custom repost template for %s has a bad slug: %s', monitored_sub.name, monitored_sub.repost_response_template)
return self.build_default_comment(search_results, **kwargs)

Expand Down
4 changes: 4 additions & 0 deletions redditrepostsleuth/core/util/default_bot_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,13 @@
"adult_promoter_remove_post": False,
"adult_promoter_ban_user": False,
"adult_promoter_notify_mod_mail": False,
"adult_promoter_ban_reason": None,
"adult_promoter_removal_reason": None,
"high_volume_reposter_ban_user": False,
"high_volume_reposter_remove_post": False,
"high_volume_reposter_threshold": 150,
"high_volume_reposter_notify_mod_mail": False,
"high_volume_reposter_removal_reason": None,
"high_volume_reposter_ban_reason": None

}
Loading
Loading