Merge pull request #157 from twitter/jbaxter/2023_10_10_scorer_update

Improve optimization stability and note status history timestamps
twitter · Oct 10, 2023 · 0c32e5c · 0c32e5c
2 parents c6aa718 + 9f2af9b
commit 0c32e5c
Show file tree

Hide file tree

Showing 5 changed files with 54 additions and 5 deletions.
diff --git a/sourcecode/scoring/constants.py b/sourcecode/scoring/constants.py
@@ -13,6 +13,7 @@
 #
 # https://docs.python.org/3/tutorial/modules.html#more-on-modules
 epochMillis = 1000 * time.time()
+useCurrentTimeInsteadOfEpochMillisForNoteStatusHistory = True
 
 maxTrainError = 0.09
 
@@ -150,6 +151,8 @@ def rater_factor_key(i):
 ratingCountKey = "ratingCount"
 numRatingsKey = "numRatings"
 numRatingsLast28DaysKey = "numRatingsLast28"
+ratingFromInitialModelingGroupKey = "ratingFromInitialModelingGroup"
+percentFromInitialModelingGroupKey = "percentFromInitialModelingGroup"
 
 # Helpfulness Score Keys
 crhRatioKey = "CRHRatio"
@@ -526,12 +529,12 @@ def rater_factor_key(i):
   (notesCurrentlyRatedHelpful, pd.Int64Dtype()),
   (notesCurrentlyRatedNotHelpful, pd.Int64Dtype()),
   (notesAwaitingMoreRatings, pd.Int64Dtype()),
-  (enrollmentState, np.int32),
+  (enrollmentState, pd.Int64Dtype()),
   (successfulRatingNeededToEarnIn, pd.Int64Dtype()),
   (authorTopNotHelpfulTagValues, str),
   (timestampOfLastStateChange, np.double),
   (aboveHelpfulnessThresholdKey, np.float64),  # nullable bool
-  (isEmergingWriterKey, np.bool_),
+  (isEmergingWriterKey, pd.BooleanDtype()),
   (aggregateRatingReceivedTotal, pd.Int64Dtype()),
   (timestampOfLastEarnOut, np.double),
   (groupRaterInterceptKey, np.double),

diff --git a/sourcecode/scoring/mf_base_scorer.py b/sourcecode/scoring/mf_base_scorer.py
@@ -176,6 +176,8 @@ def _run_stable_matrix_factorization(
     self,
     ratingsForTraining: pd.DataFrame,
     userEnrollmentRaw: pd.DataFrame,
+    minPercentRatingsFromModelingGroup: float = 0.75,
+    minNumRatingsToIncludeInStableInitialization: int = 5,
   ):
     """Train a matrix factorization model on the ratingsForTraining data.
     Due to stability issues when trained on the entire dataset with no initialization, this is done in
@@ -202,10 +204,45 @@ def _run_stable_matrix_factorization(
       left_on=c.raterParticipantIdKey,
       right_on=c.participantIdKey,
     )
-    ratingsForStableInitialization = ratingsForTrainingWithModelingGroup[
+
+    ratingsForTrainingWithModelingGroup[c.ratingFromInitialModelingGroupKey] = (
       ratingsForTrainingWithModelingGroup[c.modelingGroupKey]
       == self._modelingGroupToInitializeForStability
+    )
+
+    # Only include ratings from the modeling group
+    ratingsForStableInitialization = ratingsForTrainingWithModelingGroup[
+      ratingsForTrainingWithModelingGroup[c.ratingFromInitialModelingGroupKey]
     ]
+
+    # Only include notes that have received at least 75% of their ratings from the modeling group (and 5 total)
+    ratingsForTrainingWithModelingGroup[c.ratingCountKey] = 1
+    noteStatsByRatedModelingGroup = (
+      ratingsForTrainingWithModelingGroup.groupby(c.noteIdKey)
+      .sum()[[c.ratingFromInitialModelingGroupKey, c.ratingCountKey]]
+      .reset_index()
+    )
+    noteStatsByRatedModelingGroup[c.percentFromInitialModelingGroupKey] = (
+      noteStatsByRatedModelingGroup[c.ratingFromInitialModelingGroupKey]
+      / noteStatsByRatedModelingGroup[c.ratingCountKey]
+    )
+    noteStatsByRatedModelingGroup[
+      c.percentFromInitialModelingGroupKey
+    ] = noteStatsByRatedModelingGroup[c.percentFromInitialModelingGroupKey].fillna(0)
+    notesRatedMostlyByInitialModelingGroup = noteStatsByRatedModelingGroup[
+      (
+        noteStatsByRatedModelingGroup[c.percentFromInitialModelingGroupKey]
+        >= minPercentRatingsFromModelingGroup
+      )
+      & (
+        noteStatsByRatedModelingGroup[c.ratingCountKey]
+        >= minNumRatingsToIncludeInStableInitialization
+      )
+    ]
+    ratingsForStableInitialization = ratingsForStableInitialization.merge(
+      notesRatedMostlyByInitialModelingGroup[[c.noteIdKey]], on=c.noteIdKey
+    )
+
     assert (
       len(ratingsForStableInitialization) > 0
     ), "No ratings from stable initialization modeling group."

diff --git a/sourcecode/scoring/note_ratings.py b/sourcecode/scoring/note_ratings.py
@@ -190,7 +190,7 @@ def get_ratings_with_scores(
   )
 
   ratingsWithScores = ratingsBeforeNoteStatus[
-    [c.raterParticipantIdKey, c.helpfulNumKey, c.noteIdKey]
+    [c.raterParticipantIdKey, c.helpfulNumKey, c.noteIdKey, c.createdAtMillisKey]
   ].merge(
     scoredNotes[
       [

diff --git a/sourcecode/scoring/note_status_history.py b/sourcecode/scoring/note_status_history.py
@@ -1,3 +1,5 @@
+import time
+
 from . import constants as c
 from .scoring_rules import RuleID
 
@@ -166,7 +168,13 @@ def update_note_status_history(
   Returns:
       pd.DataFrame: noteStatusHistory
   """
-  currentTimeMillis = c.epochMillis
+  if c.useCurrentTimeInsteadOfEpochMillisForNoteStatusHistory:
+    # When running in prod, we use the latest time possible, so as to include as many valid ratings
+    # as possible, and be closest to the time the new note statuses are user-visible.
+    currentTimeMillis = 1000 * time.time()
+  else:
+    # When running in test, we use the overridable epochMillis constant.
+    currentTimeMillis = c.epochMillis
   newScoredNotesSuffix = "_sn"
   mergedStatuses = oldNoteStatusHistory.merge(
     scoredNotes[

diff --git a/sourcecode/scoring/runner.py b/sourcecode/scoring/runner.py
@@ -86,6 +86,7 @@ def main():
   args = parse_args()
   if args.epoch_millis:
     c.epochMillis = args.epoch_millis
+    c.useCurrentTimeInsteadOfEpochMillisForNoteStatusHistory = False
 
   # Load input dataframes.
   dataLoader = LocalDataLoader(args.notes, args.ratings, args.status, args.enrollment, args.headers)