Skip to content

Commit

Permalink
WIP Mehestan debug
Browse files Browse the repository at this point in the history
  • Loading branch information
lenhoanglnh committed Mar 3, 2025
1 parent 6c32e7a commit a4db59a
Show file tree
Hide file tree
Showing 18 changed files with 1,922 additions and 1,934 deletions.
4 changes: 2 additions & 2 deletions solidago/src/solidago/modules/aggregation/average.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ def __call__(self,
""" Returns weighted average of user's scores """
global_model = DirectScoring()
multiscores = user_models(entities)
voting_rights = voting_rights.groupby(["entity_name", "criterion"])
voting_rights = voting_rights.to_dict(["entity_name", "criterion"])

for (entity_name, criterion), scores in multiscores.groupby(["entity_name", "criterion"]):
for (entity_name, criterion), scores in multiscores.to_dict(["entity_name", "criterion"]):
weighted_sum = sum([
score * voting_rights[entity_name, criterion].get(username)
for username, score in scores
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ def __call__(self,
) -> ScoringModel:

global_model = DirectScoring()
voting_rights = voting_rights.groupby(["username", "entity_name", "criterion"])
voting_rights = voting_rights.to_dict(["username", "entity_name", "criterion"])
multiscores = user_models(entities)
common_kwargs = dict(lipschitz=self.lipschitz, error=self.error)

for (entity_name, criterion), scores in multiscores.groupby(["entity_name", "criterion"]):
for (entity_name, criterion), scores in multiscores.to_dict(["entity_name", "criterion"]):
rights = np.array([
voting_rights[username, entity_name, criterion]
for username, _ in scores
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,9 @@ def user_learn(self,
model = DirectScoring()
compared_entity_names = set(comparisons["left_name"]) | set(comparisons["right_name"])
entities = entities.get(compared_entity_names) # Restrict to compared entities
init = init_model(entities).groupby(["criterion"])
init = init_model(entities).to_dict("criterion")
criteria = set(comparisons["criterion"]) | set(init["criterion"])
for criterion, cmps in comparisons.groupby(["criterion"]):
for criterion, cmps in comparisons.to_dict("criterion"):
criterion_entity_names = set(cmps["left_name"]) | set(cmps["right_name"])
if len(criterion_entity_names) <= 1:
continue
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from typing import Mapping

import pandas as pd
import numpy as np

from solidago.primitives import qr_quantile
Expand Down Expand Up @@ -36,19 +35,19 @@ def __call__(self, entities: Entities, user_models: UserModels) -> UserModels:
"""
scores = user_models.score(entities) # key_names == ["username", "criterion", "entity_name"]
scales = MultiScore(key_names=["depth", "kind", "criterion"])
for criterion, user_scores in scores.groupby(["criterion"]):
for criterion, user_scores in scores.to_dict(["criterion"]):
weights = 1 / user_scores.groupby("username").transform("size")
translation_value = - qr_quantile(
lipschitz=self.lipschitz,
quantile=self.quantile,
values=np.array(user_scores["score"], dtype=np.float64),
values=np.array(user_scores["value"], dtype=np.float64),
voting_rights=np.array(weights, dtype=np.float64),
left_uncertainties=np.array(user_scores["left_unc"], dtype=np.float64),
right_uncertainties=np.array(user_scores["right_unc"], dtype=np.float64),
error=self.error,
) + self.target_score
scales.set(0, "translations", criterion, Score(translation_value, 0, 0))
return user_models.scale(scales, note="quantile_shift")
scales.set(0, "translations", criterion, translation_value, 0, 0)
return user_models.scale(scales, note="lipschitz_quantile_shift")


class LipschitzQuantileZeroShift(LipschitzQuantileShift):
Expand Down
15 changes: 4 additions & 11 deletions solidago/src/solidago/modules/scaling/lipschitz_standardize.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import pandas as pd

from solidago.primitives import qr_standard_deviation
from solidago.state import *
from solidago.modules.base import StateFunction
Expand All @@ -20,22 +18,17 @@ def __init__(self, dev_quantile: float=0.9, lipschitz: float=0.1, error: float=1
def __call__(self, entities: Entities, user_models: UserModels) -> UserModels:
scores = user_models.score(entities) # key_names == ["username", "criterion", "entity_name"]
scales = MultiScore(key_names=["depth", "kind", "criterion"])
for criterion, user_scores in scores.groupby(["criterion"]):
for criterion, user_scores in scores.to_dict(["criterion"]):
weights = 1 / user_scores.groupby("username").transform("size")
std_dev = qr_standard_deviation(
lipschitz=self.lipschitz,
values=user_scores["score"].to_numpy(),
values=user_scores["value"].to_numpy(),
quantile_dev=self.dev_quantile,
voting_rights=weights.to_numpy(),
left_uncertainties=user_scores["left_unc"].to_numpy(),
right_uncertainties=user_scores["right_unc"].to_numpy(),
default_dev=1.0,
error=self.error,
)
scales.set(0, "multipliers", criterion, Score(translation_value, 0, 0))
multipliers.set(criterion, 1 / std_dev)

return UserModels({
username: ScaledModel(model, multipliers=multipliers, note="standardize")
for username, model in user_models
})
scales.set(0, "multipliers", criterion, std_dev, 0, 0)
return user_models.scale(scales, note="lipschitz_standardardize")
4 changes: 2 additions & 2 deletions solidago/src/solidago/modules/scaling/mehestan.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,8 +542,8 @@ def compute_ratios(self,
pairs = pairs.n_samples(self.n_diffs_sample_max)
weight_list, ratio_list = list(), list()
penalty = lambda entity_name: scaler_public.penalty(self.privacy_penalty, entity_name)
scalee_scores = scalee_scores.groupby(["entity_name"])
scaler_scores = scaler_scores.groupby(["entity_name"])
scalee_scores = scalee_scores.to_dict("entity_name")
scaler_scores = scaler_scores.to_dict("entity_name")
for e, f in pairs:
ratio = (scaler_scores[e] - scaler_scores[f]) / (scalee_scores[e] - scalee_scores[f])
if ratio.isnan(): continue
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ def __call__(self,
voting_rights = VotingRights()
criteria = set(assessments["criterion"]) | set(comparisons["criterion"])
comparisons = comparisons.order_by_entities()
assessments = assessments.groupby(["criterion"])
comparisons = comparisons.groupby(["criterion"])
assessments = assessments.to_dict("criterion")
comparisons = comparisons.to_dict("criterion")
stat_names = ("cumulative_trust", "min_voting_right", "overtrust")
entity_names = {
c: set(assessments[c]["entity_name"]) | set(comparisons[c]["entity_name"])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,15 @@ def to_list(l):
self.meta._group_cache = dict()

@property
def key_names(self):
def key_names(self) -> list[str]:
return self.meta.key_names

@key_names.setter
def key_names(self, key_names: list[str]) -> None:
self.meta.key_names = key_names

@property
def value_names(self):
def value_names(self) -> list[str]:
return self.meta.value_names

""" The following methods could be worth redefining in derived classes """
Expand Down Expand Up @@ -115,7 +119,7 @@ def get(self,
) -> Union["UnnamedDataFrame", tuple]:
kwargs = self.input2dict(*args, keys_only=True, **kwargs)
if cache_groups:
return self.groupby(list(kwargs.keys()), process=process, last_only=last_only).get(
return self.to_dict(list(kwargs.keys()), process=process, last_only=last_only).get(
process=process,
last_only=last_only,
**kwargs
Expand Down Expand Up @@ -204,11 +208,15 @@ def last_only(self) -> "UnnamedDataFrame":
last_only=True
)

def groupby(self,
columns: Optional[list[str]]=None,
def groupby(self, *args, **kwargs) -> "DataFrameGroupBy":
return DataFrame(self).groupby(*args, **kwargs)

def to_dict(self,
columns: Optional[Union[str, list[str]]]=None,
process: bool=True,
last_only: Optional[bool]=None,
) -> "UnnamedDataFrameDict":
columns = [columns] if isinstance(columns, str) else columns
columns = columns if columns else self.key_names
if (tuple(columns), process, last_only) in self.meta._group_cache:
return self.meta._group_cache[tuple(columns), process, last_only]
Expand All @@ -229,6 +237,7 @@ def iter(self,
last_only: Optional[bool]=None
) -> Iterable:
last_only = self.meta._last_only if last_only is None else last_only
columns = [columns] if isinstance(columns, str) else columns
columns = self.key_names if columns is None else columns
if not columns:
yield list(), self.df2value(self, last_only) if process else self
Expand Down
2 changes: 1 addition & 1 deletion solidago/src/solidago/state/comparisons/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def to_comparison_dict(self,
result = list()
last_only = self.meta._last_only if last_only is None else last_only
comparisons = self.last_only() if last_only else self
entity_ordered_comparisons = comparisons.order_by_entities().groupby(["entity_name"])
entity_ordered_comparisons = comparisons.order_by_entities().to_dict(["entity_name"])
entity_name2index = { str(entity): index for index, entity in enumerate(entities) }
for i, entity in enumerate(entities):
comparisons = entity_ordered_comparisons[str(entity)]
Expand Down
2 changes: 1 addition & 1 deletion solidago/src/solidago/state/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def __call__(self,
key_names = list()
if isinstance(entities, Entities):
key_names.append("entity_name")
if criterion is not None:
if criterion is None:
key_names.append("criterion")
criteria = self.criteria() if criterion is None else { criterion }
entities = self.evaluated_entities(entities) if isinstance(entities, Entities) else [entities]
Expand Down
7 changes: 4 additions & 3 deletions solidago/src/solidago/state/models/user_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,19 +42,20 @@ def __call__(self,
entities: Union[str, "Entity", "Entities"],
criterion: Optional[str]=None,
) -> MultiScore:
return self.score(entity, criterion)
return self.score(entities, criterion)

def score(self,
entities: Union[str, "Entity", "Entities"],
criterion: Optional[str]=None,
) -> MultiScore:
key_names = ["username"]
from solidago.state.entities import Entities
if isinstance(entities, Entities):
key_names.append("entity_name")
if criterion is not None:
if criterion is None:
key_names.append("criterion")
criteria = self.criteria() if criterion is None else { criterion }
entities = self.evaluated_entities(entities) if isinstance(entities, Entities) else [entities]
entities = entities if isinstance(entities, Entities) else [entities]
scores = [
(str(user), str(entity), c, model.score(entity, c))
for user, model in self
Expand Down
Loading

0 comments on commit a4db59a

Please sign in to comment.