diff --git a/doc/source/guide/FAQ.rst b/doc/source/guide/FAQ.rst index d64cc7c8..ab17725b 100644 --- a/doc/source/guide/FAQ.rst +++ b/doc/source/guide/FAQ.rst @@ -24,7 +24,7 @@ How do I access all of the current recommendations shown in my widget? How do I set the Lux widgets to show up on default? """""""""""""""""""""""""""""""""""""""""""""""""""""""" - By default, we show the Pandas display and users can use the toggle button to switch to the Lux display. The `set_default_display` function allows users to change the setting so that the Lux widget is set as the default view for future operations on the specified dataframe: + By default, we show the Pandas display and users can use the toggle button to switch to the Lux display. The `default_display` property allows users to change the setting so that the Lux widget is set as the default view for future operations on the specified dataframe: .. code-block:: python diff --git a/lux/executor/PandasExecutor.py b/lux/executor/PandasExecutor.py index 0f9fe707..ecbed649 100644 --- a/lux/executor/PandasExecutor.py +++ b/lux/executor/PandasExecutor.py @@ -253,10 +253,11 @@ def execute_2D_binning(vis: Vis): result = result.rename(columns={x_attr.attribute:"z"}) result = result[result["z"]!=0] - result.loc[:,"xBinStart"] = result["xBin"].apply(lambda x: x.left) + # convert type to facilitate weighted correlation interestingess calculation + result.loc[:,"xBinStart"] = result["xBin"].apply(lambda x: x.left).astype('float') result.loc[:,"xBinEnd"] = result["xBin"].apply(lambda x: x.right) - result.loc[:,"yBinStart"] = result["yBin"].apply(lambda x: x.left) + result.loc[:,"yBinStart"] = result["yBin"].apply(lambda x: x.left).astype('float') result.loc[:,"yBinEnd"] = result["yBin"].apply(lambda x: x.right) vis._vis_data = result.drop(columns=["xBin","yBin"]) diff --git a/lux/interestingness/interestingness.py b/lux/interestingness/interestingness.py index f272f1f8..b3874ddd 100644 --- a/lux/interestingness/interestingness.py +++ b/lux/interestingness/interestingness.py @@ -67,7 +67,7 @@ def interestingness(vis:Vis ,ldf:LuxDataFrame) -> int: # Scatter Plot elif (n_dim == 0 and n_msr == 2): if (vis.mark=="heatmap"): - return 0.3 #TODO: Need better interestingness metric for binned scatterplots (heatmaps) + return weighted_correlation(vis.data["xBinStart"],vis.data["yBinStart"],vis.data["z"]) if (v_size<2): return -1 if (n_filter==1): v_filter_size = get_filtered_size(filter_specs, vis.data) @@ -132,6 +132,16 @@ def skewness(v): from scipy.stats import skew return skew(v) +def weighted_avg(x, w): + return np.average(x,weights=w) + +def weighted_cov(x, y, w): + return np.sum(w * (x - weighted_avg(x, w)) * (y - weighted_avg(y, w))) / np.sum(w) + +def weighted_correlation(x, y, w): + # Based on https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#Weighted_correlation_coefficient + return weighted_cov(x, y, w) / np.sqrt(weighted_cov(x, x, w) * weighted_cov(y, y, w)) + def deviation_from_overall(vis:Vis, ldf:LuxDataFrame, filter_specs:list, msr_attribute:str) -> int: """ Difference in bar chart/histogram shape from overall chart diff --git a/lux/vislib/altair/Heatmap.py b/lux/vislib/altair/Heatmap.py index 4f7dec5a..5898e083 100644 --- a/lux/vislib/altair/Heatmap.py +++ b/lux/vislib/altair/Heatmap.py @@ -43,10 +43,10 @@ def initialize_chart(self): # self.code += f"visData = pd.DataFrame({str(self.data.to_dict(orient='records'))})\n" self.code += f"visData = pd.DataFrame({str(self.data.to_dict())})\n" self.code += f''' - chart = alt.Chart(self.data).mark_rect().encode( - x=alt.X('xBinStart', type='quantitative', axis=alt.Axis(title=x_attr.attribute), bin = alt.BinParams(binned=True)), + chart = alt.Chart(visData).mark_rect().encode( + x=alt.X('xBinStart', type='quantitative', axis=alt.Axis(title='{x_attr.attribute}'), bin = alt.BinParams(binned=True)), x2=alt.X2('xBinEnd'), - y=alt.Y('yBinStart', type='quantitative', axis=alt.Axis(title=y_attr.attribute), bin = alt.BinParams(binned=True)), + y=alt.Y('yBinStart', type='quantitative', axis=alt.Axis(title='{y_attr.attribute}'), bin = alt.BinParams(binned=True)), y2=alt.Y2('yBinEnd'), #opacity = alt.Opacity('z',type='quantitative',scale=alt.Scale(type="log")) color = alt.Color('z',type='quantitative', scale=alt.Scale(scheme='blues',type="log"),legend=None)