Skip to content

Commit

Permalink
WIP Debugging Mehestan
Browse files Browse the repository at this point in the history
  • Loading branch information
lenhoanglnh committed Mar 4, 2025
1 parent dba352c commit 3aef433
Show file tree
Hide file tree
Showing 12 changed files with 1,982 additions and 1,925 deletions.
75 changes: 62 additions & 13 deletions solidago/experiments/toy.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,19 +39,68 @@

# t = pipeline(s)

# users, entities, made_public, user_models = s.users, s.entities, s.made_public, s.user_models
# criterion = next(iter(user_models.criteria()))
# scores = user_models(entities, criterion)
# trusts = dict(zip(users.index, users["trust_score"]))

# self = pipeline.scaling.collaborative_scaling
# activities, is_scaler = self.compute_activities_and_scalers(users, trusts, made_public, scores)
# users[f"activities_{criterion}"] = activities
# users[f"is_scaler_{criterion}"] = is_scaler
# scalers = users.get({ f"is_scaler_{criterion}": True })

# scaler_scales, scaler_scores = self.scale_to_scalers(trusts, made_public, scores[scalers], scores[scalers], scalees_are_scalers=True)
# scalee_model_norms = self.compute_model_norms(made_public, scalee_scores)
users, entities, made_public, user_models = s.users, s.entities, s.made_public, s.user_models
criterion = next(iter(user_models.criteria()))
scores = user_models(entities, criterion)
trusts = dict(zip(users.index, users["trust_score"]))

self = pipeline.scaling.collaborative_scaling
activities, is_scaler = self.compute_activities_and_scalers(users, trusts, made_public, scores)
users[f"activities_{criterion}"] = activities
users[f"is_scaler_{criterion}"] = is_scaler
scalers = users.get({ f"is_scaler_{criterion}": True })

# scaler_scales, scaler_scores = self.scale_to_scalers(trusts, made_public,
# scores.get_all(scalers), scores.get_all(scalers), scalees_are_scalers=True)
scaler_scores = scores.get_all(scalers)
scalee_scores = scores.get_all(scalers)
scalees_are_scalers = True
scalee_model_norms = self.compute_model_norms(made_public, scalee_scores)

# weight_lists, ratio_lists = self.ratios(made_public, scaler_scores, scalee_scores)
key_names = ["scalee_name", "scaler_name"]
weight_lists = VotingRights(key_names=key_names, last_only=False)
comparison_lists = MultiScore(key_names=key_names, last_only=False)
scalee_name = next(iter(set(scalee_scores["username"])))
kwargs = scalee_scores.input2dict(username=scalee_name, keys_only=True)
last_only = scalee_scores.meta._last_only
scalee_name_scores = scalee_scores.get(username=scalee_name, cache_groups=True)
scalee_entity_names = set(scalee_name_scores["entity_name"])
scaler_names = set.union(*[
set(scaler_scores.get(entity_name=entity_name, cache_groups=True)["username"])
for entity_name in scalee_entity_names
])
scaler_name = next(iter(scaler_names))

scaler_name_scores = scaler_scores.get(username=scaler_name, cache_groups=True)
scaler_entity_names = set(scaler_name_scores["entity_name"])
common_entity_names = scalee_entity_names & scaler_entity_names
scaler_public = made_public.get(username=scaler_name, cache_groups=True)

# voting_rights, ratios = self.aggregate_scaler_scores(trusts, weight_lists, ratio_lists)
# multipliers = self.compute_multipliers(voting_rights, ratios, scalee_model_norms)

# for (scalee_name, entity_name), score in scalee_scores:
# scalee_scores.set(scalee_name, entity_name, score * multipliers.get(username=scalee_name))
# if scalees_are_scalers:
# scaler_scores = scalee_scores

# weight_lists, diff_lists = self.diffs(made_public, scaler_scores, scalee_scores)
# voting_rights, diffs = self.aggregate_scaler_scores(trusts, weight_lists, diff_lists)
# translations = self.compute_translations(voting_rights, diffs)

# for (scalee_name, entity_name), score in scalee_scores:
# scalee_scores.set(scalee_name, entity_name, score + translations.get(username=scalee_name))

# multipliers["kind"] = "multiplier"
# translations["kind"] = "translation"
# scalee_scales = MultiScore(multipliers | translations, key_names=["username", "kind"])



# nonscalers = users.get({ f"is_scaler_{criterion}": False })
# nonscaler_scores = scores.get_all(nonscalers)
# nonscaler_scales, _ = self.scale_to_scalers(trusts, made_public, scaler_scores, nonscaler_scores)

# for seed in range(5):
# directory = f"tests/modules/saved/{seed}"
Expand Down
2 changes: 1 addition & 1 deletion solidago/src/solidago/generators/engagement/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def __call__(self, users: Users, entities: Entities) -> tuple[MadePublic, Assess
eval_entities = self.sample_evaluated_entities(user, entities)
for index, entity in enumerate(eval_entities):
public = self.public(user, entity, eval_entities)
made_public.set(public, user, entity)
made_public.set(user, entity, public)
assess = self.assess(user, entity, eval_entities)
if assess:
assessments.add_row(user, "default", entity)
Expand Down
21 changes: 13 additions & 8 deletions solidago/src/solidago/modules/scaling/mehestan.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,15 +322,15 @@ def scaler_scalee_comparison_lists(self,
scalee_name_scores = scalee_scores.get(username=scalee_name, cache_groups=True)
scalee_entity_names = set(scalee_name_scores["entity_name"])
scaler_names = set.union(*[
scaler_scores.get(entity_name=entity_name, cache_groups=True)["username"]
set(scaler_scores.get(entity_name=entity_name, cache_groups=True)["username"])
for entity_name in scalee_entity_names
])

for scaler_name in scaler_names:

if scalee_name == scaler_name:
weight_lists[scalee_name, scaler_name] = [1]
comparison_lists[scalee_name, scaler_name] = [default_value]
weight_lists.set(scalee_name, scaler_name, 1)
comparison_lists.set(scalee_name, scaler_name, default_value)
continue

scaler_name_scores = scaler_scores.get(username=scaler_name, cache_groups=True)
Expand All @@ -347,7 +347,7 @@ def scaler_scalee_comparison_lists(self,

for weight, scaler_scalee_comparison in zip(*output):
weight_lists.add_row(scalee_name, scaler_name, weight)
comparison_lists.add_row(scalee_name, scaler_name, ratio)
comparison_lists.add_row(scalee_name, scaler_name, scaler_scalee_comparison)

return weight_lists, comparison_lists

Expand Down Expand Up @@ -384,8 +384,8 @@ def aggregate_scaler_scores(self,
voting_rights=np.array(
weight_lists.get(scalee_name, scaler_name, cache_groups=True)["voting_right"]
, dtype=np.float64),
left_uncertainties=np.array(scores_df["left_unc"], dtype=np.float64),
right_uncertainties=np.array(scores_df["right_unc"], dtype=np.float64),
left_uncertainties=np.array(score_df["left_unc"], dtype=np.float64),
right_uncertainties=np.array(score_df["right_unc"], dtype=np.float64),
)
value = qr_median(**kwargs)
uncertainty = qr_uncertainty(median=value, **kwargs)
Expand Down Expand Up @@ -574,9 +574,14 @@ def compute_multipliers(self,
multipliers[user][1] is the uncertainty on the multiplier
"""
kwargs = dict(default_value=1.0, default_dev=self.default_multiplier_dev)
l = lambda name: self.lipschitz / (8 * (1e-9 + model_norms[scalee_name]))
l = lambda scalee_name: self.lipschitz / (8 * (1e-9 + model_norms[scalee_name]))
return MultiScore([
(name, *self.aggregate_scalers(weights, ratios[name], l(name), **kwargs).to_triplet())
(name, *self.aggregate_scalers(
weights,
ratios.get(scalee_name=name),
l(name),
**kwargs
).to_triplet())
for name, weights in voting_rights.iter(["scalee_name"])
], key_names=["scalee_name"])

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,15 @@ def input2dict(self, *args, keys_only: bool=False, **kwargs) -> dict:
args = args[:len(self.key_names)]
assert len(args) <= len(key_value_columns) + 1
assert all({ key not in key_value_columns[:len(args)] for key in kwargs })
f = lambda v, k: str(v) if k in self.key_names else v
def f(v, k):
if k in self.key_names and isinstance(v, DataFrame):
return set(v.index)
elif k in self.key_names and isinstance(v, (set, list, tuple)):
return v
elif k in self.key_names:
return str(v)
else:
return v
kwargs = { k: f(v, k) for k, v in kwargs.items() if (not keys_only or k in self.key_names) }
if not self.value_names and len(args) > len(self.key_names):
assert len(args) == len(self.key_names) + 1
Expand All @@ -118,12 +126,11 @@ def get(self,
**kwargs
) -> Union["UnnamedDataFrame", tuple]:
kwargs = self.input2dict(*args, keys_only=True, **kwargs)
last_only = self.meta._last_only if last_only is None else last_only
if cache_groups:
return self.to_dict(list(kwargs.keys()), process=process, last_only=last_only).get(
process=process,
last_only=last_only,
**kwargs
)
key_names = [ k for k in self.key_names if k in kwargs ]
key_values = [ kwargs[k] for k in key_names ]
return self.to_dict(key_names, process, last_only).__getitem__(tuple(key_values))
df = self[reduce(lambda a, x: a & x, [ self[k] == v for k, v in kwargs.items() ], True)]
other_key_names = [ key_name for key_name in self.key_names if key_name not in kwargs ]
if other_key_names or not process:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(self,

def __getitem__(self, key: Union[Any, tuple[str]]) -> DataFrame:
keys = tuple(str(k) for k in key) if isinstance(key, tuple) else str(key)
keys = keys[0] if len(keys) == 1 else keys
return self.dict[keys] if keys in self.dict else self.df_cls()

def __repr__(self) -> str:
Expand Down
10 changes: 9 additions & 1 deletion solidago/src/solidago/state/models/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,15 @@ def input2dict(self, *args, keys_only: bool=False, **kwargs) -> dict:
args = args[:len(self.key_names)]
assert len(args) <= len(key_value_columns) + 3
assert all({ key not in key_value_columns[:len(args)] for key in kwargs })
f = lambda v, k: str(v) if k in self.key_names else v
def f(v, k):
if k in self.key_names and isinstance(v, DataFrame):
return set(v.index)
elif k in self.key_names and isinstance(v, (set, list, tuple)):
return v
elif k in self.key_names:
return str(v)
else:
return v
kwargs = { k: f(v, k) for k, v in kwargs.items() if (not keys_only or k in self.key_names) }
args_key_names = [ kn for kn in self.key_names if kn not in kwargs ]
kwargs |= { k: f(v, k) for k, v in zip(args_key_names, args[:len(args_key_names)]) }
Expand Down
Loading

0 comments on commit 3aef433

Please sign in to comment.