-
Notifications
You must be signed in to change notification settings - Fork 73
/
mmd.py
150 lines (111 loc) · 4.93 KB
/
mmd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
'''
MMD functions implemented in tensorflow.
'''
from __future__ import division
import tensorflow as tf
from tf_ops import dot, sq_sum
_eps=1e-8
################################################################################
### Quadratic-time MMD with Gaussian RBF kernel
def _mix_rbf_kernel(X, Y, sigmas, wts=None):
if wts is None:
wts = [1] * len(sigmas)
XX = tf.matmul(X, X, transpose_b=True)
XY = tf.matmul(X, Y, transpose_b=True)
YY = tf.matmul(Y, Y, transpose_b=True)
X_sqnorms = tf.diag_part(XX)
Y_sqnorms = tf.diag_part(YY)
r = lambda x: tf.expand_dims(x, 0)
c = lambda x: tf.expand_dims(x, 1)
K_XX, K_XY, K_YY = 0, 0, 0
for sigma, wt in zip(sigmas, wts):
gamma = 1 / (2 * sigma**2)
K_XX += wt * tf.exp(-gamma * (-2 * XX + c(X_sqnorms) + r(X_sqnorms)))
K_XY += wt * tf.exp(-gamma * (-2 * XY + c(X_sqnorms) + r(Y_sqnorms)))
K_YY += wt * tf.exp(-gamma * (-2 * YY + c(Y_sqnorms) + r(Y_sqnorms)))
return K_XX, K_XY, K_YY, tf.reduce_sum(wts)
def rbf_mmd2(X, Y, sigma=1, biased=True):
return mix_rbf_mmd2(X, Y, sigmas=[sigma], biased=biased)
def mix_rbf_mmd2(X, Y, sigmas=(1,), wts=None, biased=True):
K_XX, K_XY, K_YY, d = _mix_rbf_kernel(X, Y, sigmas, wts)
return _mmd2(K_XX, K_XY, K_YY, const_diagonal=d, biased=biased)
def rbf_mmd2_and_ratio(X, Y, sigma=1, biased=True):
return mix_rbf_mmd2_and_ratio(X, Y, sigmas=[sigma], biased=biased)
def mix_rbf_mmd2_and_ratio(X, Y, sigmas=(1,), wts=None, biased=True):
K_XX, K_XY, K_YY, d = _mix_rbf_kernel(X, Y, sigmas, wts)
return _mmd2_and_ratio(K_XX, K_XY, K_YY, const_diagonal=d, biased=biased)
################################################################################
### Helper functions to compute variances based on kernel matrices
def _mmd2(K_XX, K_XY, K_YY, const_diagonal=False, biased=False):
m = tf.cast(K_XX.get_shape()[0], tf.float32)
n = tf.cast(K_YY.get_shape()[0], tf.float32)
if biased:
mmd2 = (tf.reduce_sum(K_XX) / (m * m)
+ tf.reduce_sum(K_YY) / (n * n)
- 2 * tf.reduce_sum(K_XY) / (m * n))
else:
if const_diagonal is not False:
trace_X = m * const_diagonal
trace_Y = n * const_diagonal
else:
trace_X = tf.trace(K_XX)
trace_Y = tf.trace(K_YY)
mmd2 = ((tf.reduce_sum(K_XX) - trace_X) / (m * (m - 1))
+ (tf.reduce_sum(K_YY) - trace_Y) / (n * (n - 1))
- 2 * tf.reduce_sum(K_XY) / (m * n))
return mmd2
def _mmd2_and_ratio(K_XX, K_XY, K_YY, const_diagonal=False, biased=False,
min_var_est=_eps):
mmd2, var_est = _mmd2_and_variance(
K_XX, K_XY, K_YY, const_diagonal=const_diagonal, biased=biased)
ratio = mmd2 / tf.sqrt(tf.maximum(var_est, min_var_est))
return mmd2, ratio
def _mmd2_and_variance(K_XX, K_XY, K_YY, const_diagonal=False, biased=False):
m = tf.cast(K_XX.get_shape()[0], tf.float32) # Assumes X, Y are same shape
### Get the various sums of kernels that we'll use
# Kts drop the diagonal, but we don't need to compute them explicitly
if const_diagonal is not False:
const_diagonal = tf.cast(const_diagonal, tf.float32)
diag_X = diag_Y = const_diagonal
sum_diag_X = sum_diag_Y = m * const_diagonal
sum_diag2_X = sum_diag2_Y = m * const_diagonal**2
else:
diag_X = tf.diag_part(K_XX)
diag_Y = tf.diag_part(K_YY)
sum_diag_X = tf.reduce_sum(diag_X)
sum_diag_Y = tf.reduce_sum(diag_Y)
sum_diag2_X = sq_sum(diag_X)
sum_diag2_Y = sq_sum(diag_Y)
Kt_XX_sums = tf.reduce_sum(K_XX, 1) - diag_X
Kt_YY_sums = tf.reduce_sum(K_YY, 1) - diag_Y
K_XY_sums_0 = tf.reduce_sum(K_XY, 0)
K_XY_sums_1 = tf.reduce_sum(K_XY, 1)
Kt_XX_sum = tf.reduce_sum(Kt_XX_sums)
Kt_YY_sum = tf.reduce_sum(Kt_YY_sums)
K_XY_sum = tf.reduce_sum(K_XY_sums_0)
Kt_XX_2_sum = sq_sum(K_XX) - sum_diag2_X
Kt_YY_2_sum = sq_sum(K_YY) - sum_diag2_Y
K_XY_2_sum = sq_sum(K_XY)
if biased:
mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * m)
+ (Kt_YY_sum + sum_diag_Y) / (m * m)
- 2 * K_XY_sum / (m * m))
else:
mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * (m-1))
+ (Kt_YY_sum + sum_diag_Y) / (m * (m-1))
- 2 * K_XY_sum / (m * m))
var_est = (
2 / (m**2 * (m-1)**2) * (
2 * sq_sum(Kt_XX_sums) - Kt_XX_2_sum
+ 2 * sq_sum(Kt_YY_sums) - Kt_YY_2_sum)
- (4*m-6) / (m**3 * (m-1)**3) * (Kt_XX_sum**2 + Kt_YY_sum**2)
+ 4*(m-2) / (m**3 * (m-1)**2) * (
sq_sum(K_XY_sums_1) + sq_sum(K_XY_sums_0))
- 4 * (m-3) / (m**3 * (m-1)**2) * K_XY_2_sum
- (8*m - 12) / (m**5 * (m-1)) * K_XY_sum**2
+ 8 / (m**3 * (m-1)) * (
1/m * (Kt_XX_sum + Kt_YY_sum) * K_XY_sum
- dot(Kt_XX_sums, K_XY_sums_1)
- dot(Kt_YY_sums, K_XY_sums_0))
)
return mmd2, var_est