Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for Windows and Mac #2

Merged
merged 9 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions build_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def build_extensions(self) -> None:
# with clang++. This works across compilers (ignored by MSVC).
for extension in self.extensions:
extension.extra_compile_args.append("-std=c++11")
extension.extra_compile_args.append("-DLEMON_ONLY_TEMPLATES")

try:
build_ext.build_extensions(self)
Expand All @@ -34,6 +35,7 @@ def build_extensions(self) -> None:
# so the code can compile on macOS with Anaconda.
for extension in self.extensions:
extension.extra_compile_args.append("-stdlib=libc++")
extension.extra_compile_args.append("-DLEMON_ONLY_TEMPLATES")
extension.extra_link_args.append("-stdlib=libc++")
build_ext.build_extensions(self)

Expand Down
6 changes: 4 additions & 2 deletions fair_kmeans/_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ void postprocessData(double *array, double *sampleWeights, int *colors, uint n,
int j = 0;
for (uint i = 0; i < n * d; i += d)
{
double w = sampleWeights[j];
// In a future version, we might consider using weights as doubles
// but currently the algorithm does not allow for that
int w = static_cast<int>(sampleWeights[j]);
int c = colors[j];
number[c] += 1;
weightsum[c] += w;
Expand Down Expand Up @@ -109,7 +111,7 @@ extern "C"
uint nc,
uint maxIterations,
double tolerance,
std::uint64_t seed,
int seed,
int *labels,
double *centers,
bool updateCenters,
Expand Down
2 changes: 1 addition & 1 deletion fair_kmeans/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def _run_fair_clustering(
c_k = ctypes.c_uint(self.n_clusters)
c_n_colors = ctypes.c_uint(self.n_colors_)

c_random_state = ctypes.c_size_t(self._seed)
c_random_state = ctypes.c_int(self._seed)

c_labels = (ctypes.c_int * n_samples)()
c_centers = self.cluster_centers_.ctypes.data_as(
Expand Down
18 changes: 14 additions & 4 deletions fair_kmeans/fair_clustering_tools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,12 @@ std::vector<Point> computeFairlets(const std::vector<std::vector<Point>> &colore
return fairlets;
}

int randomDoubleToInt(double value, int min, int max)
{
double result = (value * (max - min)) + min;
return static_cast<int>(std::ceil(result));
}

// Computes the fair assignment of points to a given set of centers. Returns new centroids.
CentersValueChange compute_fair_assignment(
std::vector<std::vector<Point>> &points,
Expand Down Expand Up @@ -336,18 +342,22 @@ CentersValueChange compute_fair_assignment(
int centerindex = 0;

std::mt19937 rng(seed); // random-number engine used (Mersenne-Twister in this case)
std::uniform_int_distribution<int> uni(0, points[0].size() - 1); // guaranteed unbiased
// Replaced this because it does not produce the same results on different platforms
//std::uniform_int_distribution<int> uni(0, points[0].size() - 1); // guaranteed unbiased
std::uniform_real_distribution<double> realDist(0.0, 1.0);

# define getRandomValue() randomDoubleToInt(realDist(rng), 0, points[0].size() - 1)

// Choose k - 1 random integers between 0 and the size of the blue points
// Such that we can use them in case our clusters are empty
std::vector<Matching> matchingPairs(centers.size() - 1);

for (size_t i = 0; i < centers.size() - 1; ++i)
for (size_t i = 0; i < matchingPairs.size(); ++i)
{
auto randomInteger = uni(rng);
auto randomInteger = getRandomValue();
while (std::find_if(matchingPairs.begin(), matchingPairs.end(), find_id(randomInteger)) != matchingPairs.end())
{
randomInteger = uni(rng);
randomInteger = getRandomValue();
}
matchingPairs[i].source = randomInteger;
}
Expand Down
2 changes: 2 additions & 0 deletions fair_kmeans/point.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#include <vector>
#include <cstddef>

typedef unsigned int uint;

// Small class for storing weighted points
class Point
{
Expand Down
6 changes: 3 additions & 3 deletions tests/datasets/adult-balanced_10.output
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
1.27575e+10
10
8
1
56.1786,9.60714,0,859.607,32.75
55.5714,9.35714,0,859.607,30.25
39.5,12.5,99999,0,45
47,10.75,8855.5,0,45
41.8889,12.4444,12553,0,46.5
36.9618,10.0632,3.23508,0,39.1671
49.7,9.5,3841.2,0,31.6
51.4,10.2,3841.2,0,38.6
46.3667,10.8,0,2022.5,45.1
42.625,12.5,61495.9,0,39.875
38.6818,10.6364,5793.09,0,44
Expand Down
132 changes: 66 additions & 66 deletions tests/datasets/adult-balanced_100.output
Original file line number Diff line number Diff line change
@@ -1,103 +1,103 @@
1.26827e+10
11
13
1
49.1923,10.2308,0,0,34.8462
49.25,10.3214,0,0,35.5714
39.5,12.5,99999,0,45
23,10.1667,0,0,21.6667
19.25,9.375,0,0,22.375
18,5,0,0,22.5
19.5,6.75,0,0,19.5
15,6.5,0,0,22.5
18.75,6.5,0,0,20
36.5,10,3946,0,32.5
46.5,9,0,1914,40
56,10,7420.5,0,47.5
40.75,12.375,61495.9,0,41.125
19,9.33333,0,0,17.3333
11.75,4.5,0,0,20
42.8333,10.5,5195.5,0,46.8333
71.5,9.5,0,3400,42.5
51,9.5,457,0,30
18,9,0,0,20
18.5,6.5,0,0,18.75
50,7.5,457,0,32.5
22,6,0,0,30
27.25,7.75,0,0,30
34.5,9,0,1383.5,42.5
15.75,4.75,0,0,17.5
15,6.5,0,0,22.5
24.75,6,0,0,30
16.5,4.5,0,0,21.25
40.375,12,6173.62,0,44
13.5,5,0,0,20
44,11.8333,1542.83,0,46.6667
34,9,0,0,29.5
43.8333,12.1667,1542.83,0,46.6667
54.8333,10,0,2165.17,44.1667
42.1667,9.66667,0,746.25,41.1667
25.5,9.5,1087,0,32.5
42.75,9.16667,0,746.25,41.1667
55.25,10,0,933.75,36.75
50,14,12293,0,50
28.1667,10.1667,0,0,54.7667
29.5,11.75,1203.5,0,32.5
54,9.75,3340.75,0,38.25
27.9667,10.1333,0,0,54.6
38.25,10.875,1159.75,0,25.125
53.75,12,3340.75,0,40.75
53.5,9,4402,0,40
31,11,6813.5,0,40
23,5.75,0,0,20
39,8.2,1941.3,0,42.6
42.5,10,0,0,9
38.6,7.3,1941.3,0,42.6
42.25,7.5,5841,0,48
29,8,0,0,16.25
25.2,10.4,0,0,26.2
23.2917,10.75,0,0,43
20.3333,9.33333,0,0,29.3333
25.1667,10.5,0,0,25
9.5,5.25,0,0,7.75
34.5,11.5,14485.5,0,45
43.1667,12.6667,2193,0,41.6667
37.24,10.06,0,0,44.96
42.5,10.5,2193,0,41.6667
37.7292,9.91667,0,0,44.9375
42,12.25,11163.2,0,46
47,10.5,0,2043.25,41
26.5,9,4176.5,0,35
46,8.25,2589,0,41.25
50.4545,10.8636,0,0,56.1364
45.5,8.5,2589,0,41.25
50.65,11,0,0,56.65
34,13.5,4947.5,0,35.5
70,6,898.5,0,40
20,8.25,0,0,29.5
22.5,5.75,3093.75,0,18.75
46.875,9.875,0,0,88.25
27.44,9.52,0,0,39.68
24,4.25,2568.75,0,22
63.6,10.3,0,0,16.6
28.5,9.5,2325,0,32.5
53.5,12.75,0,933.75,41.25
27.14,9.18,0,0,39.52
42.5,9.25,0,852.25,46.25
62.25,11,0,0,17
30.5,9.5,2325,0,35
24.8333,9.66667,0,867.167,28.6667
44.8333,11.6667,0,1821.33,40.8333
11.25,4.5,0,0,18.75
47.9231,9.73077,0,0,45.2308
17.5,7,0,0,30
47.8333,9.63333,0,0,45.8667
41,14,0,1686,53.5
64.8636,8.27273,0,0,39.9545
39.25,10.75,0,1738.75,53.75
26.6667,7.83333,0,0,45.8333
28,12.5,0,0,9
32.7027,9.59459,0,0,38.9189
18.3,8.2,0,0,12.1
23.8333,12,0,0,16.5
27.5,3.25,0,0,20
38,12.4762,0,0,39.3571
20.8333,5.66667,0,0,40.5
42.5,10,0,0,9
67.5,8.8,0,0,5.2
29.25,4,0,0,32
27.6,11.6,0,0,13.5
32.6892,9.63514,0,0,39.2432
26.25,6.25,0,862.75,33.75
22.5833,10.3333,0,0,21.25
39.25,5.25,0,0,12.5
37.675,12.775,0,0,39.725
17.75,5.5,0,0,27
22.75,6.5,831.25,0,22.5
65.6667,8.83333,0,0,5.5
56.0278,9.30556,0,0,39.4167
17,6.5,0,0,27.5
24.8333,9.16667,0,867.167,29.1667
27,4.5,572.5,0,7
17.25,6.75,0,0,14.75
25.5,11,0,1869.5,41
45,9.1,0,0,22.6
20.7667,9.36667,0,0,39.4
37.375,11.0833,0,0,54.8333
45.9,8.8,0,0,22.6
22.5,9.72,0,0,41.16
37,11.0833,0,0,54.9167
82,8.5,0,0,48
33.7778,11.5,0,0,68.4444
44.25,10.4167,0,0,39.7083
44.375,11.15,0,0,39.85
58.2273,10.8636,0,0,46.1364
39.5,11,0,1944.5,55
24.1304,11.3913,0,0,35.5217
28.825,12.225,0,0,43.4
38.8611,9.16667,0,0,34
24.9,12.1333,0,0,35.7
28.375,11.875,0,0,43.7292
39.2619,9.09524,0,0,34.8571
59.5,7.33333,0,0,63.1667
21.5,7,1096.5,0,20
15,7.5,0,0,16.25
51.5,13.5,15024,0,40
12.25,4.5,0,0,22
59.5,9,1145,0,7
17,6,0,0,5
9,4,0,0,4
80.1667,8.83333,0,0,25.6667
15.75,5.75,0,0,21.25
19.625,8.75,0,0,6.25
32.25,12,0,0,20.75
34,8,0,0,27.1667
63.6,7.5,0,0,30.2
22,4,0,0,44.75
19.6,10.1,0,0,16.7
19.5833,9.08333,0,0,8.83333
18.25,8.875,0,0,23
29,12,0,0,31.8333
65.1,7.2,0,0,29.2
19.7,9.25,0,0,35.7
30.6667,12.5,0,0,94.5
16,4.75,0,0,26.25
15.5,5.25,0,0,19
42.75,7.25,0,852.25,46.25
18.25,4,977,0,21.5
32,7.5,0,0,17
19.25,6.75,0,0,20
60 changes: 30 additions & 30 deletions tests/datasets/adult-balanced_50.output
Original file line number Diff line number Diff line change
@@ -1,53 +1,53 @@
1.26829e+10
20
1.26828e+10
25
1
68.4667,9.2,0,0,16.1333
68.8333,9.03333,0,0,16.6333
39.5,12.5,99999,0,45
24.4,10.55,0,0,21.9
20.2857,10.0714,0,0,16.5714
52,14,15024,0,50
40.75,12.375,61495.9,0,41.125
36.5,10,3946,0,32.5
39.1,11,0,1849.5,40.7
20.5,10.4286,0,0,15.5714
18.9167,8.41667,0,0,7.58333
25.471,10.5145,0,0,41.2681
38.4286,10.3286,0,0,35.6714
17.25,6.75,0,0,14.75
21.7679,9.80357,0,0,35.1964
42.1667,12.3333,11163.2,0,44.3333
71.5,9.5,0,3400,42.5
40.375,12,6173.62,0,44
16.5,5,0,0,21.5
56,9.5,457,0,35
27.8772,10.5702,0,0,39.6754
52.5,10.5,457,0,35
34.5,11.5,14485.5,0,45
43.2,9.3,1941.3,0,45.3
17.25,6.75,0,0,14.75
39.4,8.4,1941.3,0,42.6
27.6,9.65,0,0,29.35
42.8333,10.5,5195.5,0,46.8333
56,10,7420.5,0,47.5
16.5,5.5,0,0,20
18.9167,8.41667,0,0,7.58333
48.5,13,12293,0,50
46.8333,11.3333,2193,0,41.6667
57.25,7.25,3340.75,0,49.75
44.5,9.16667,1184,0,20.8333
39,9.65385,0,819.308,37.7308
48,11.5,2193,0,40.6667
55.5,7.5,3340.75,0,52
38.125,10.125,1159.75,0,25.75
46.1667,8.5,0,746.25,41.1667
54.8333,10,0,2165.17,44.1667
33.373,10.4603,0,0,41.0952
35.0278,10.5,0,0,42.3704
53.5,9,4402,0,40
29,11.5,2325,0,36.5
30.5,9.5,2325,0,37.5
43.5,11.8333,0,1721.17,53.6667
31,11,6813.5,0,40
47,12,1542.83,0,50
21.3043,9.34783,0,0,34.4783
42.0357,10.4214,0,0,42.1786
47.25,9,2589,0,41.25
48,11.3333,1542.83,0,50.6667
18.25,5.5,0,0,18
35.3333,11.2879,0,0,58
47.25,9,2589,0,36.875
42.25,7.5,5841,0,48
44.5,10.6667,0,2010.33,45.6667
48.1731,10.0577,0,0,32.6346
25.5,11,1087,0,35
18.7857,8.92857,0,0,25
33.8333,10,0,0,14
31.6316,9.02632,0,0,32.4474
45.4151,10.4057,0,0,42.283
36.6429,8.07143,0,881.929,39.4286
48,9.2,0,0,23.25
28.625,11.0625,0,0,14.1875
20.4545,9.40909,0,0,24.0909
34,13.5,4947.5,0,35.5
26.5,9,4176.5,0,35
33.6618,10.9559,0,0,58.2206
58.6176,9.38235,0,0,43.6275
24.1552,9.98276,0,0,45.1379
58.97,9.36,0,0,42.98
39,10.5,0,1383.5,45
40.1111,10.5,0,0,87.3333
38.5,10.6111,0,0,87.6111
79.5,8.5,898.5,0,40
Loading
Loading