Skip to content

Commit

Permalink
added weighted correlation as the interestingness metric for heatmap
Browse files Browse the repository at this point in the history
* bug fix heatmap export plot code
* minor typo `default_display` in FAQ.rst
  • Loading branch information
dorisjlee committed Sep 29, 2020
1 parent aea2bc3 commit 70c1f14
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 7 deletions.
2 changes: 1 addition & 1 deletion doc/source/guide/FAQ.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ How do I access all of the current recommendations shown in my widget?

How do I set the Lux widgets to show up on default?
""""""""""""""""""""""""""""""""""""""""""""""""""""""""
By default, we show the Pandas display and users can use the toggle button to switch to the Lux display. The `set_default_display` function allows users to change the setting so that the Lux widget is set as the default view for future operations on the specified dataframe:
By default, we show the Pandas display and users can use the toggle button to switch to the Lux display. The `default_display` property allows users to change the setting so that the Lux widget is set as the default view for future operations on the specified dataframe:

.. code-block:: python
Expand Down
5 changes: 3 additions & 2 deletions lux/executor/PandasExecutor.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,10 +253,11 @@ def execute_2D_binning(vis: Vis):
result = result.rename(columns={x_attr.attribute:"z"})
result = result[result["z"]!=0]

result.loc[:,"xBinStart"] = result["xBin"].apply(lambda x: x.left)
# convert type to facilitate weighted correlation interestingess calculation
result.loc[:,"xBinStart"] = result["xBin"].apply(lambda x: x.left).astype('float')
result.loc[:,"xBinEnd"] = result["xBin"].apply(lambda x: x.right)

result.loc[:,"yBinStart"] = result["yBin"].apply(lambda x: x.left)
result.loc[:,"yBinStart"] = result["yBin"].apply(lambda x: x.left).astype('float')
result.loc[:,"yBinEnd"] = result["yBin"].apply(lambda x: x.right)

vis._vis_data = result.drop(columns=["xBin","yBin"])
Expand Down
12 changes: 11 additions & 1 deletion lux/interestingness/interestingness.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def interestingness(vis:Vis ,ldf:LuxDataFrame) -> int:
# Scatter Plot
elif (n_dim == 0 and n_msr == 2):
if (vis.mark=="heatmap"):
return 0.3 #TODO: Need better interestingness metric for binned scatterplots (heatmaps)
return weighted_correlation(vis.data["xBinStart"],vis.data["yBinStart"],vis.data["z"])
if (v_size<2): return -1
if (n_filter==1):
v_filter_size = get_filtered_size(filter_specs, vis.data)
Expand Down Expand Up @@ -132,6 +132,16 @@ def skewness(v):
from scipy.stats import skew
return skew(v)

def weighted_avg(x, w):
return np.average(x,weights=w)

def weighted_cov(x, y, w):
return np.sum(w * (x - weighted_avg(x, w)) * (y - weighted_avg(y, w))) / np.sum(w)

def weighted_correlation(x, y, w):
# Based on https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#Weighted_correlation_coefficient
return weighted_cov(x, y, w) / np.sqrt(weighted_cov(x, x, w) * weighted_cov(y, y, w))

def deviation_from_overall(vis:Vis, ldf:LuxDataFrame, filter_specs:list, msr_attribute:str) -> int:
"""
Difference in bar chart/histogram shape from overall chart
Expand Down
6 changes: 3 additions & 3 deletions lux/vislib/altair/Heatmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ def initialize_chart(self):
# self.code += f"visData = pd.DataFrame({str(self.data.to_dict(orient='records'))})\n"
self.code += f"visData = pd.DataFrame({str(self.data.to_dict())})\n"
self.code += f'''
chart = alt.Chart(self.data).mark_rect().encode(
x=alt.X('xBinStart', type='quantitative', axis=alt.Axis(title=x_attr.attribute), bin = alt.BinParams(binned=True)),
chart = alt.Chart(visData).mark_rect().encode(
x=alt.X('xBinStart', type='quantitative', axis=alt.Axis(title='{x_attr.attribute}'), bin = alt.BinParams(binned=True)),
x2=alt.X2('xBinEnd'),
y=alt.Y('yBinStart', type='quantitative', axis=alt.Axis(title=y_attr.attribute), bin = alt.BinParams(binned=True)),
y=alt.Y('yBinStart', type='quantitative', axis=alt.Axis(title='{y_attr.attribute}'), bin = alt.BinParams(binned=True)),
y2=alt.Y2('yBinEnd'),
#opacity = alt.Opacity('z',type='quantitative',scale=alt.Scale(type="log"))
color = alt.Color('z',type='quantitative', scale=alt.Scale(scheme='blues',type="log"),legend=None)
Expand Down

0 comments on commit 70c1f14

Please sign in to comment.