From 3ccef78750bf21499afd6bffc6d8b07f209274a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Nar=C4=99bski?= Date: Mon, 25 Nov 2024 01:13:11 +0100 Subject: [PATCH 01/16] diffinsights_web: Add utils/humanize.py This module includes html_date_humane() and html_int_humane(), copied from 'diffinsights_web/02-contributors_graph.py'. --- src/diffinsights_web/utils/humanize.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 src/diffinsights_web/utils/humanize.py diff --git a/src/diffinsights_web/utils/humanize.py b/src/diffinsights_web/utils/humanize.py new file mode 100644 index 0000000..1a215b8 --- /dev/null +++ b/src/diffinsights_web/utils/humanize.py @@ -0,0 +1,24 @@ +"""Provide human-readable value, together with machine-readable HTML metadata/microdata""" +import os + +import pandas as pd + + +def html_date_humane(date: pd.Timestamp) -> str: + date_format = '%d %a %Y' + if os.name == 'nt': + date_format = '%#d %a %Y' + elif os.name == 'posix': + date_format = '%-d %a %Y' + + return f'' + + +def html_int_humane(val: int) -> str: + thousands_sep = " " # Unicode thin space (breakable in HTML),   + + res = f'{val:,}' + if thousands_sep != ",": + res = res.replace(",", thousands_sep) + + return f'{res}' From 280f02084a9ba6da0eaeb7099ff11e36825b95ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Nar=C4=99bski?= Date: Mon, 25 Nov 2024 02:02:24 +0100 Subject: [PATCH 02/16] diffinsights_web: Add RepoPlotHeader above TimelinePlot On the main part of the apps/contributors.py page, add a header with the information about type of contributions, resampling frequency, and displayed date range (this takes into account the value of "Period:" widget by checking the displayed range of the plot). This is almost straight copy of the code from 02-contributors_graph.py. --- src/diffinsights_web/apps/contributors.py | 15 +++++- src/diffinsights_web/views/info.py | 59 +++++++++++++++++++++-- 2 files changed, 67 insertions(+), 7 deletions(-) diff --git a/src/diffinsights_web/apps/contributors.py b/src/diffinsights_web/apps/contributors.py index 923f4de..83ec76b 100644 --- a/src/diffinsights_web/apps/contributors.py +++ b/src/diffinsights_web/apps/contributors.py @@ -6,7 +6,7 @@ from diffinsights_web.datastore.timeline import TimelineDataStore, find_dataset_dir from diffinsights_web.utils.notifications import onload_callback from diffinsights_web.views.dataexplorer import TimelineJSONViewer, TimelinePerspective, TimelineDataFrameEnum -from diffinsights_web.views.info import ContributorsHeader +from diffinsights_web.views.info import ContributorsHeader, RepoPlotHeader from diffinsights_web.views.plots.timeseries import TimeseriesPlot from diffinsights_web.widgets.caching import ClearCacheButton @@ -33,6 +33,11 @@ column_name=page_header.select_contribution_type_widget, from_date_str=page_header.select_period_from_widget, ) +timeseries_plot_header = RepoPlotHeader( + freq=data_store.resample_frequency_widget, + column_name=page_header.select_contribution_type_widget, + plot=timeseries_plot, +) # Create the dashboard layout template = pn.template.MaterialTemplate( @@ -49,7 +54,13 @@ pn.Column( page_header, ), - timeseries_plot, + pn.Card( + pn.Column( + timeseries_plot_header, + timeseries_plot, + ), + collapsible=False, hide_header=True, + ), ], ) timeline_perspective = TimelinePerspective(data_store=data_store) diff --git a/src/diffinsights_web/views/info.py b/src/diffinsights_web/views/info.py index 5474144..721510d 100644 --- a/src/diffinsights_web/views/info.py +++ b/src/diffinsights_web/views/info.py @@ -5,8 +5,15 @@ from dateutil.relativedelta import relativedelta from diffinsights_web.datastore.timeline import frequency_names +from diffinsights_web.utils.humanize import html_date_humane +from diffinsights_web.views.plots.timeseries import TimeseriesPlot +# common for all classes defined here +head_styles = { + 'font-size': 'larger', +} + #: for the ContributorsHeader.select_period_from_widget time_range_period = { 'All': None, @@ -64,10 +71,6 @@ class ContributorsHeader(pn.viewable.Viewer): # see table at https://pandas.pydata.org/docs/user_guide/timeseries.html#dateoffset-objects ) - head_styles = { - 'font-size': 'larger', - } - widget_top_margin = 20 widget_gap_size = 5 @@ -101,7 +104,53 @@ def __init__(self, **params): def __panel__(self): return pn.Row( - pn.pane.HTML(self.head_text_rx, styles=self.head_styles), + pn.pane.HTML(self.head_text_rx, styles=head_styles), self.select_period_from_widget, self.select_contribution_type_widget, ) + + +def sampling_info(resample_freq: str, + column: str, + frequency_names_map: dict[str, str], + min_max_date) -> str: + contribution_type = column_to_contribution.get(column, "Unknown type of contribution") + + return f""" + {contribution_type} over time +

+ {frequency_names_map.get(resample_freq, 'unknown frequency').title()}ly + from {html_date_humane(min_max_date[0])} + to {html_date_humane(min_max_date[1])} +

+ """ + + +class RepoPlotHeader(pn.viewable.Viewer): + freq = param.String( + allow_refs=True, # allow for reactive expressions, and widgets + doc="Resampling frequency as frequency string, for documentation purposes only", + # see table at https://pandas.pydata.org/docs/user_guide/timeseries.html#dateoffset-objects + ) + # allow_refs=True is here to allow widgets + column_name = param.String( + allow_refs=True, # allow for reactive expressions, and widgets + doc="Contribution type as value: column name in DataFrame, or special name", + ) + plot = param.ClassSelector(class_=TimeseriesPlot) + + def __init__(self, **params): + super().__init__(**params) + + self.sampling_info_rx = pn.rx(sampling_info)( + resample_freq=self.param.freq.rx(), + column=self.param.column_name.rx(), + frequency_names_map=frequency_names, + min_max_date=self.plot.date_range_rx, + ) + + def __panel__(self): + return pn.pane.HTML( + self.sampling_info_rx, + styles=head_styles + ) From e7a36241e6e9ee4691157578d103476b699288e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Nar=C4=99bski?= Date: Mon, 25 Nov 2024 09:40:19 +0100 Subject: [PATCH 03/16] diffinsights_web: Remove wrapping, return HoloViews in TimeseriesPlot The pn.Card that wraps both the plot header and the plot itself is now generated by the contributors.py app since the previous commit. Removing the pn.Card(pn.Column(...)) wrapping from TimeseriesPlot.__panel__() avoids having pn.Card withing pn.Card, improving the look. --- src/diffinsights_web/views/plots/timeseries.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/diffinsights_web/views/plots/timeseries.py b/src/diffinsights_web/views/plots/timeseries.py index 38aad03..a023975 100644 --- a/src/diffinsights_web/views/plots/timeseries.py +++ b/src/diffinsights_web/views/plots/timeseries.py @@ -270,10 +270,7 @@ def __init__(self, **params): ) def __panel__(self) -> pn.viewable.Viewable: - return pn.Card( - pn.Column( - #pn.pane.HTML(sampling_info_rx, styles=head_styles), - pn.pane.HoloViews(self.plot_commits_rx, theme=self.select_plot_theme_widget) - ), - collapsible=False, hide_header=True, + return pn.pane.HoloViews( + self.plot_commits_rx, + theme=self.select_plot_theme_widget, ) From 766d4fd7818ad734dc0243d92e6a6f3b3f16d25d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Nar=C4=99bski?= Date: Mon, 25 Nov 2024 10:24:31 +0100 Subject: [PATCH 04/16] diffinsights_web: Compute " [%]" in add_pm_count_perc() This new column simply computes ("-:" + "+:")/("-:count" + "+:count") This would be good match for any plot or indicator where we do not have space (or support) for showing both "-: [%]" and "+: [%]". It was checked manually by takieng a look at the Perspective pane. --- src/diffinsights_web/datastore/timeline.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/diffinsights_web/datastore/timeline.py b/src/diffinsights_web/datastore/timeline.py index 4dc0665..251cd33 100644 --- a/src/diffinsights_web/datastore/timeline.py +++ b/src/diffinsights_web/datastore/timeline.py @@ -144,6 +144,25 @@ def add_pm_count_perc(resampled_df: pd.DataFrame, elif col.startswith('-:'): resampled_df.loc[:, col_perc] = resampled_df[col] / resampled_df['-:count'] + for col in pm_count_cols: + if col in {'-:count', '+:count'}: # '-:count' or '+:count' + continue + + # previous loop ensured that both "-:" and "+:" exists + if col.startswith('-:'): # we need only one of those + continue + + col_base = col[2:] # remove "+:" prefix + col_base_perc = f"{col_base} [%]" + if col_base_perc in resampled_df.columns: + # print(f" SKIP {col_base_perc}") + continue + + resampled_df.loc[:, col_base_perc] = ( + (resampled_df[f"-:{col_base}"] + resampled_df[f"+:{col_base}"]) / + (resampled_df['-:count'] + resampled_df['+:count']) + ) + #print(f" returned DataFrame(<{hex(id(resampled_df))}>)") return resampled_df From fb4a8526aa9d8621bbe54e455e6452c42f72e0c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Nar=C4=99bski?= Date: Mon, 25 Nov 2024 11:20:17 +0100 Subject: [PATCH 05/16] diffinsights_web: Introduce SpecialColumn enum This Enum is introduced to reduce code duplication, and the need to keep the special name ("KIND [%]") in sync: 3 places in 2 files. --- src/diffinsights_web/views/info.py | 4 ++-- src/diffinsights_web/views/plots/timeseries.py | 9 +++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/diffinsights_web/views/info.py b/src/diffinsights_web/views/info.py index 721510d..1881239 100644 --- a/src/diffinsights_web/views/info.py +++ b/src/diffinsights_web/views/info.py @@ -6,7 +6,7 @@ from diffinsights_web.datastore.timeline import frequency_names from diffinsights_web.utils.humanize import html_date_humane -from diffinsights_web.views.plots.timeseries import TimeseriesPlot +from diffinsights_web.views.plots.timeseries import SpecialColumn, TimeseriesPlot # common for all classes defined here @@ -43,7 +43,7 @@ def time_range_options() -> dict[str, str]: "Patch size (lines)": "diff.patch_size", "Patch spreading (lines)": "diff.groups_spread", # special cases: - "Line types distribution +:[%]": "KIND [%]", + "Line types distribution +:[%]": SpecialColumn.LINE_TYPES_PERC.value, } column_to_contribution = { v: k for k, v in contribution_types_map.items() diff --git a/src/diffinsights_web/views/plots/timeseries.py b/src/diffinsights_web/views/plots/timeseries.py index a023975..0ef9196 100644 --- a/src/diffinsights_web/views/plots/timeseries.py +++ b/src/diffinsights_web/views/plots/timeseries.py @@ -1,3 +1,4 @@ +from enum import Enum from typing import Optional import pandas as pd @@ -9,6 +10,10 @@ from diffinsights_web.views import TimelineView +class SpecialColumn(Enum): + LINE_TYPES_PERC = "KIND [%]" + + @pn.cache def get_date_range(timeline_df: pd.DataFrame, from_date_str: str): # TODO: create reactive component or bound function to compute from_date to avoid recalculations @@ -108,7 +113,7 @@ def plot_commits(resampled_df: pd.DataFrame, ylim = (-1, ylim[1]) # special cases: y range limits - if column == "KIND [%]": + if column == SpecialColumn.LINE_TYPES_PERC.value: ylim = (0.0, 1.05) # via https://oklch-palette.vercel.app/ and https://htmlcolorcodes.com/rgb-to-hex/ @@ -125,7 +130,7 @@ def plot_commits(resampled_df: pd.DataFrame, color = color_map.get(column, '#006dd8') # special cases: the plot itself - if column == "KIND [%]": + if column == SpecialColumn.LINE_TYPES_PERC.value: kind_perc_columns = [ col for col in resampled_df.columns if col.startswith('+:type.') and col.endswith(' [%]') From a17022e3b2c22411f14bf905fdcbd04e3811f46c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Nar=C4=99bski?= Date: Mon, 25 Nov 2024 13:47:00 +0100 Subject: [PATCH 06/16] diffinsights_web: Plot [%], not +:[%] line types distribution Change the name of "Contribution:" widget option from "Line types distribution +:[%]" to "Line types distribution [%]", and change the plot to show newly introduced " [%]" cumulative / stacked distribution. --- src/diffinsights_web/views/info.py | 2 +- src/diffinsights_web/views/plots/timeseries.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/diffinsights_web/views/info.py b/src/diffinsights_web/views/info.py index 1881239..f14dc33 100644 --- a/src/diffinsights_web/views/info.py +++ b/src/diffinsights_web/views/info.py @@ -43,7 +43,7 @@ def time_range_options() -> dict[str, str]: "Patch size (lines)": "diff.patch_size", "Patch spreading (lines)": "diff.groups_spread", # special cases: - "Line types distribution +:[%]": SpecialColumn.LINE_TYPES_PERC.value, + "Line types distribution [%]": SpecialColumn.LINE_TYPES_PERC.value, } column_to_contribution = { v: k for k, v in contribution_types_map.items() diff --git a/src/diffinsights_web/views/plots/timeseries.py b/src/diffinsights_web/views/plots/timeseries.py index 0ef9196..402a96f 100644 --- a/src/diffinsights_web/views/plots/timeseries.py +++ b/src/diffinsights_web/views/plots/timeseries.py @@ -133,7 +133,7 @@ def plot_commits(resampled_df: pd.DataFrame, if column == SpecialColumn.LINE_TYPES_PERC.value: kind_perc_columns = [ col for col in resampled_df.columns - if col.startswith('+:type.') and col.endswith(' [%]') + if col.startswith('type.') and col.endswith(' [%]') ] kind_perc_columns.sort(key=line_type_sorting_key) if not kind_perc_columns: From a075aa3612179895e824b69293dc92e7398aa098 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Nar=C4=99bski?= Date: Mon, 25 Nov 2024 15:31:54 +0100 Subject: [PATCH 07/16] diffinsights_web: Extract perspective_pane() function in dataexplorer.py This will be used to display DataFrames that are not sub-fields of the TimelineDataStore. Extracting pn.pane.Perspective configuration into a new perspective_pane() function ensures that all Perspective-s in the tabbed interface have the same look and options. --- src/diffinsights_web/views/dataexplorer.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/diffinsights_web/views/dataexplorer.py b/src/diffinsights_web/views/dataexplorer.py index 8af2286..e33d207 100644 --- a/src/diffinsights_web/views/dataexplorer.py +++ b/src/diffinsights_web/views/dataexplorer.py @@ -1,5 +1,6 @@ from enum import Enum +import pandas as pd import panel as pn from diffinsights_web.views import TimelineView @@ -22,6 +23,16 @@ def __panel__(self): ) +def perspective_pane(df: pd.DataFrame, title: str): + return pn.pane.Perspective( + df, + title=title, + editable=False, + width_policy='max', + height=500, + ) + + class TimelinePerspective(TimelineView): def panel(self, dataframe: TimelineDataFrameEnum = TimelineDataFrameEnum.TIMELINE_DATA): if dataframe == TimelineDataFrameEnum.RESAMPLED_DATA: @@ -38,10 +49,7 @@ def panel(self, dataframe: TimelineDataFrameEnum = TimelineDataFrameEnum.TIMELIN title = pn.rx("Perspective: repo={repo!r}") \ .format(repo=self.data_store.select_repo_widget) - return pn.pane.Perspective( - df_rx, + return perspective_pane( + df=df_rx, title=title, - editable=False, - width_policy='max', - height=500, ) From 5b1df5df5c0b68e7c3dfdc8c06c6cda6f42ae798 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Nar=C4=99bski?= Date: Mon, 25 Nov 2024 16:14:19 +0100 Subject: [PATCH 08/16] diffinsights_web: Create .authors_info_df_rx, show it in tab panel Move some functions, namely get_date_range() and filter_df_by_from_date() around to help avoid circular dependencies, so that most functions related to creating DataFrames and extracting data from them are in datastore/timeline.py. Copy authors_info_df() from 02-contributors_graph.py, and use it to create .authors_info_df_rx field in TimeseriesPlot.__init__(). Manually checked that the DataFrame looks correctly in Perspective view in the tabbed widget at the bottom of the main part of the page (that was added here in this commit). . NOTE: moving get_date_range() and filter_df_by_from_date() to datastore/timeline.py, and putting authors_info_df() there instead of having them all inn views/plots/timeseries.py turned out to not be necessary. Let's keep them where they are, for now. --- src/diffinsights_web/apps/contributors.py | 12 ++- src/diffinsights_web/datastore/timeline.py | 82 ++++++++++++++++++- .../views/plots/timeseries.py | 57 ++----------- 3 files changed, 99 insertions(+), 52 deletions(-) diff --git a/src/diffinsights_web/apps/contributors.py b/src/diffinsights_web/apps/contributors.py index 83ec76b..54158d6 100644 --- a/src/diffinsights_web/apps/contributors.py +++ b/src/diffinsights_web/apps/contributors.py @@ -5,7 +5,8 @@ import diffinsights_web.utils.notifications as notifications from diffinsights_web.datastore.timeline import TimelineDataStore, find_dataset_dir from diffinsights_web.utils.notifications import onload_callback -from diffinsights_web.views.dataexplorer import TimelineJSONViewer, TimelinePerspective, TimelineDataFrameEnum +from diffinsights_web.views.dataexplorer import TimelineJSONViewer, TimelinePerspective, TimelineDataFrameEnum, \ + perspective_pane from diffinsights_web.views.info import ContributorsHeader, RepoPlotHeader from diffinsights_web.views.plots.timeseries import TimeseriesPlot from diffinsights_web.widgets.caching import ClearCacheButton @@ -71,6 +72,15 @@ ('data', timeline_perspective.panel(TimelineDataFrameEnum.TIMELINE_DATA)), ('resampled', timeline_perspective.panel(TimelineDataFrameEnum.RESAMPLED_DATA)), ('by author+resampled', timeline_perspective.panel(TimelineDataFrameEnum.BY_AUTHOR_DATA)), + ( + 'authors info', + perspective_pane( + df=timeseries_plot.authors_info_df_rx, + title=pn.rx("Authors info for repo={repo!r}, from={from_date!r}") \ + .format(repo=data_store.select_repo_widget, + from_date=page_header.select_period_from_widget) + ) + ) ), ]) diff --git a/src/diffinsights_web/datastore/timeline.py b/src/diffinsights_web/datastore/timeline.py index 251cd33..3403a48 100644 --- a/src/diffinsights_web/datastore/timeline.py +++ b/src/diffinsights_web/datastore/timeline.py @@ -8,7 +8,6 @@ from diffinsights_web.utils.notifications import warning_notification - DATASET_DIR = 'data/examples/stats' @@ -204,6 +203,56 @@ def resample_timeline(timeline_df: pd.DataFrame, return add_pm_count_perc(df_agg, pm_count_cols) +@pn.cache +def get_date_range(timeline_df: pd.DataFrame, from_date_str: str): + # TODO: create reactive component or bound function to compute from_date to avoid recalculations + # TODO: use parsed `from_date` instead of using raw `from_date_str` + min_date = timeline_df['author_date'].min() + if from_date_str: + from_date = pd.to_datetime(from_date_str, dayfirst=True, utc=True) + min_date = max(min_date, from_date) + + ## DEBUG + #print(f"get_date_range(timeline_df=<{hex(id(timeline_df))}, {from_date_str=}>):") + #print(f" {min_date=}, {timeline_df['author_date'].max()=}") + + return ( + min_date, + timeline_df['author_date'].max(), + ) + + +# NOTE: consider putting the filter earlier in the pipeline (needs profiling / benchmarking?) +# TODO: replace `from_date_str` (raw string) with `from_date` (parsed value) +def filter_df_by_from_date(resampled_df: pd.DataFrame, + from_date_str: str, + date_column: Optional[str] = None) -> pd.DataFrame: + from_date: Optional[pd.Timestamp] = None + if from_date_str: + try: + # the `from_date_str` is in DD.MM.YYYY format + from_date = pd.to_datetime(from_date_str, dayfirst=True, utc=True) + except ValueError as err: + # NOTE: should not happen, value should be validated earlier + warning_notification(f"from={from_date_str!r} is not a valid date: {err}") + + filtered_df = resampled_df + if from_date is not None: + if date_column is None: + filtered_df = resampled_df[resampled_df.index >= from_date] + else: + if pd.api.types.is_timedelta64_dtype(resampled_df[date_column]): + filtered_df = resampled_df[resampled_df[date_column] >= from_date] + elif pd.api.types.is_numeric_dtype(resampled_df[date_column]): + # assume numeric date column is UNIX timestamp + filtered_df = resampled_df[resampled_df[date_column] >= from_date.timestamp()] + else: + warning_notification(f"unsupported type {resampled_df.dtypes[date_column]!r} " + f"for column {date_column!r}") + + return filtered_df + + # mapping form display name to frequency alias # see table in https://pandas.pydata.org/docs/user_guide/timeseries.html#dateoffset-objects time_series_frequencies = { @@ -223,6 +272,37 @@ def resample_timeline(timeline_df: pd.DataFrame, } +def authors_info_df(timeline_df: pd.DataFrame, + column: str = 'n_commits', + from_date_str: str = '') -> pd.DataFrame: + info_columns = list(agg_func_mapping().keys()) + + # sanity check + if column not in info_columns: + column = info_columns[0] + + filtered_df = filter_df_by_from_date(timeline_df, from_date_str, + date_column='author.timestamp') + + df = filtered_df\ + .groupby(by='author.email')[info_columns + ['author.name']]\ + .agg({ + col: 'sum' for col in info_columns + } | { + # https://stackoverflow.com/questions/15222754/groupby-pandas-dataframe-and-select-most-common-value + 'author.name': pd.Series.mode, + })\ + .sort_values(by=column, ascending=False)\ + .rename(columns={ + '+:count': 'p_count', + '-:count': 'm_count', + 'author.name': 'author_name', + }) + + #print(f" -> {df.columns=}, {df.index.name=}") + return df + + class TimelineDataStore(pn.viewable.Viewer): dataset_dir = param.Foldername( constant=True, diff --git a/src/diffinsights_web/views/plots/timeseries.py b/src/diffinsights_web/views/plots/timeseries.py index 402a96f..9c4fbb4 100644 --- a/src/diffinsights_web/views/plots/timeseries.py +++ b/src/diffinsights_web/views/plots/timeseries.py @@ -6,6 +6,7 @@ import param import hvplot.pandas # noqa +from diffinsights_web.datastore.timeline import get_date_range, filter_df_by_from_date, authors_info_df from diffinsights_web.utils.notifications import warning_notification from diffinsights_web.views import TimelineView @@ -14,56 +15,6 @@ class SpecialColumn(Enum): LINE_TYPES_PERC = "KIND [%]" -@pn.cache -def get_date_range(timeline_df: pd.DataFrame, from_date_str: str): - # TODO: create reactive component or bound function to compute from_date to avoid recalculations - # TODO: use parsed `from_date` instead of using raw `from_date_str` - min_date = timeline_df['author_date'].min() - if from_date_str: - from_date = pd.to_datetime(from_date_str, dayfirst=True, utc=True) - min_date = max(min_date, from_date) - - ## DEBUG - #print(f"get_date_range(timeline_df=<{hex(id(timeline_df))}, {from_date_str=}>):") - #print(f" {min_date=}, {timeline_df['author_date'].max()=}") - - return ( - min_date, - timeline_df['author_date'].max(), - ) - - -# NOTE: consider putting the filter earlier in the pipeline (needs profiling / benchmarking?) -# TODO: replace `from_date_str` (raw string) with `from_date` (parsed value) -def filter_df_by_from_date(resampled_df: pd.DataFrame, - from_date_str: str, - date_column: Optional[str] = None) -> pd.DataFrame: - from_date: Optional[pd.Timestamp] = None - if from_date_str: - try: - # the `from_date_str` is in DD.MM.YYYY format - from_date = pd.to_datetime(from_date_str, dayfirst=True, utc=True) - except ValueError as err: - # NOTE: should not happen, value should be validated earlier - warning_notification(f"from={from_date_str!r} is not a valid date: {err}") - - filtered_df = resampled_df - if from_date is not None: - if date_column is None: - filtered_df = resampled_df[resampled_df.index >= from_date] - else: - if pd.api.types.is_timedelta64_dtype(resampled_df[date_column]): - filtered_df = resampled_df[resampled_df[date_column] >= from_date] - elif pd.api.types.is_numeric_dtype(resampled_df[date_column]): - # assume numeric date column is UNIX timestamp - filtered_df = resampled_df[resampled_df[date_column] >= from_date.timestamp()] - else: - warning_notification(f"unsupported type {resampled_df.dtypes[date_column]!r} " - f"for column {date_column!r}") - - return filtered_df - - def line_type_sorting_key(column_name: str) -> int: if 'type.code' in column_name: return 1 @@ -260,6 +211,12 @@ def __init__(self, **params): timeline_df=self.data_store.timeline_df_rx, from_date_str=self.param.from_date_str.rx(), ) + # authors info for authors grid selection + self.authors_info_df_rx = pn.rx(authors_info_df)( + timeline_df=self.data_store.timeline_df_rx, + column=self.param.column_name.rx(), + from_date_str=self.param.from_date_str.rx(), + ) self.select_plot_theme_widget = pn.widgets.Select( name="Plot theme:", From bfe28ebde7a0c502090e578db9ab877e6e6ecff7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Nar=C4=99bski?= Date: Mon, 25 Nov 2024 17:36:49 +0100 Subject: [PATCH 09/16] diffinsights_web: Add 'selected author' pane to the tabbed widget Add the DataFrame with resampled information for a single selected author to the tabbed widget at the bottom ot the main part of the page, via pn.pane.Perspective pane. The code is currently present in apps/contributors.py, but it will be (at least in part) extracted to a separate file. Manual exploration shows that there are entries missing (skipped periods, for example skipped months for 'ME' / 'month end' resample frequency) in this per-author DataFrame. This code was copied, with changes, from 02-contributors_graph.py. --- src/diffinsights_web/apps/contributors.py | 46 +++++++++++++++++++++- src/diffinsights_web/datastore/timeline.py | 4 ++ 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/src/diffinsights_web/apps/contributors.py b/src/diffinsights_web/apps/contributors.py index 54158d6..e47df3b 100644 --- a/src/diffinsights_web/apps/contributors.py +++ b/src/diffinsights_web/apps/contributors.py @@ -1,9 +1,12 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +from typing import Optional + +import pandas as pd import panel as pn import diffinsights_web.utils.notifications as notifications -from diffinsights_web.datastore.timeline import TimelineDataStore, find_dataset_dir +from diffinsights_web.datastore.timeline import TimelineDataStore, find_dataset_dir, author_timeline_df from diffinsights_web.utils.notifications import onload_callback from diffinsights_web.views.dataexplorer import TimelineJSONViewer, TimelinePerspective, TimelineDataFrameEnum, \ perspective_pane @@ -40,6 +43,32 @@ plot=timeseries_plot, ) + +def authors_list(authors_df: pd.DataFrame, + top_n: Optional[int] = None) -> list[str]: + # TODO: return mapping { "[name] <[email]>": "[email]",... }, + # instead of returning list of emails [ "[email]",... ] + if top_n is None: + return authors_df.index.to_list() + else: + return authors_df.head(top_n).index.to_list() + + +# might be not a Select widget +top_n_widget = pn.widgets.Select(name="top N", options=[4,10,32], value=4) +authors_list_rx = pn.rx(authors_list)( + authors_df=timeseries_plot.authors_info_df_rx, # depends: column, from_date_str + top_n=top_n_widget, +) +select_author_widget = pn.widgets.Select( + name="author", + options=authors_list_rx, +) +author_timeline_df_rx = pn.rx(author_timeline_df)( + resample_by_author_df=data_store.resampled_timeline_by_author_rx, + author_id=select_author_widget, +) + # Create the dashboard layout template = pn.template.MaterialTemplate( site="diffannotator", @@ -80,7 +109,20 @@ .format(repo=data_store.select_repo_widget, from_date=page_header.select_period_from_widget) ) - ) + ), + ( + 'selected author', + pn.Column( + select_author_widget, + perspective_pane( + df=author_timeline_df_rx, + title=pn.rx("repo={repo!r}, author={author!r}").format( + repo=data_store.select_repo_widget, + author=select_author_widget, + ), + ), + ) + ), ), ]) diff --git a/src/diffinsights_web/datastore/timeline.py b/src/diffinsights_web/datastore/timeline.py index 3403a48..cc73594 100644 --- a/src/diffinsights_web/datastore/timeline.py +++ b/src/diffinsights_web/datastore/timeline.py @@ -203,6 +203,10 @@ def resample_timeline(timeline_df: pd.DataFrame, return add_pm_count_perc(df_agg, pm_count_cols) +def author_timeline_df(resample_by_author_df: pd.DataFrame, author_id: str) -> pd.DataFrame: + return resample_by_author_df.loc[author_id] + + @pn.cache def get_date_range(timeline_df: pd.DataFrame, from_date_str: str): # TODO: create reactive component or bound function to compute from_date to avoid recalculations From e6d7a90cde3cdd691509690e83a87231774fc23f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Nar=C4=99bski?= Date: Mon, 25 Nov 2024 17:50:06 +0100 Subject: [PATCH 10/16] diffinsights_web: Add "top N" widget to select N top authors The widget is now added to the apps/contributors.py sidebar. --- src/diffinsights_web/apps/contributors.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/diffinsights_web/apps/contributors.py b/src/diffinsights_web/apps/contributors.py index e47df3b..c955616 100644 --- a/src/diffinsights_web/apps/contributors.py +++ b/src/diffinsights_web/apps/contributors.py @@ -76,7 +76,10 @@ def authors_list(authors_df: pd.DataFrame, favicon="favicon.svg", sidebar=[ data_store, - pn.layout.Divider(), + top_n_widget, + + pn.layout.Divider(), # - - - - - - - - - - - - - + timeseries_plot.select_plot_theme_widget, ClearCacheButton(), ], From f668b9fc60c22ed19d714dd235be463251a8f67e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Nar=C4=99bski?= Date: Mon, 25 Nov 2024 18:33:12 +0100 Subject: [PATCH 11/16] diffinsights_web: Extract AuthorInfo class into views/authorsgrid.py Move eb=verything related to creating 'selected author' Perspective pane to the newly created views/authorsgrid.py file. . New convention: .widgets() is a list of controlling widgets, while .__panel__() is the main view. TODO: see how it is done in "Declare UIs with Declarative API": https://panel.holoviz.org/how_to/param/index.html). --- src/diffinsights_web/apps/contributors.py | 45 +++------------- src/diffinsights_web/views/authorsgrid.py | 64 +++++++++++++++++++++++ 2 files changed, 70 insertions(+), 39 deletions(-) create mode 100644 src/diffinsights_web/views/authorsgrid.py diff --git a/src/diffinsights_web/apps/contributors.py b/src/diffinsights_web/apps/contributors.py index c955616..667868f 100644 --- a/src/diffinsights_web/apps/contributors.py +++ b/src/diffinsights_web/apps/contributors.py @@ -8,6 +8,7 @@ import diffinsights_web.utils.notifications as notifications from diffinsights_web.datastore.timeline import TimelineDataStore, find_dataset_dir, author_timeline_df from diffinsights_web.utils.notifications import onload_callback +from diffinsights_web.views.authorsgrid import AuthorInfo from diffinsights_web.views.dataexplorer import TimelineJSONViewer, TimelinePerspective, TimelineDataFrameEnum, \ perspective_pane from diffinsights_web.views.info import ContributorsHeader, RepoPlotHeader @@ -42,31 +43,9 @@ column_name=page_header.select_contribution_type_widget, plot=timeseries_plot, ) - - -def authors_list(authors_df: pd.DataFrame, - top_n: Optional[int] = None) -> list[str]: - # TODO: return mapping { "[name] <[email]>": "[email]",... }, - # instead of returning list of emails [ "[email]",... ] - if top_n is None: - return authors_df.index.to_list() - else: - return authors_df.head(top_n).index.to_list() - - -# might be not a Select widget -top_n_widget = pn.widgets.Select(name="top N", options=[4,10,32], value=4) -authors_list_rx = pn.rx(authors_list)( - authors_df=timeseries_plot.authors_info_df_rx, # depends: column, from_date_str - top_n=top_n_widget, -) -select_author_widget = pn.widgets.Select( - name="author", - options=authors_list_rx, -) -author_timeline_df_rx = pn.rx(author_timeline_df)( - resample_by_author_df=data_store.resampled_timeline_by_author_rx, - author_id=select_author_widget, +authors_info_panel = AuthorInfo( + data_store=data_store, + authors_info_df=timeseries_plot.authors_info_df_rx, ) # Create the dashboard layout @@ -76,7 +55,7 @@ def authors_list(authors_df: pd.DataFrame, favicon="favicon.svg", sidebar=[ data_store, - top_n_widget, + *authors_info_panel.widgets(), pn.layout.Divider(), # - - - - - - - - - - - - - @@ -113,19 +92,7 @@ def authors_list(authors_df: pd.DataFrame, from_date=page_header.select_period_from_widget) ) ), - ( - 'selected author', - pn.Column( - select_author_widget, - perspective_pane( - df=author_timeline_df_rx, - title=pn.rx("repo={repo!r}, author={author!r}").format( - repo=data_store.select_repo_widget, - author=select_author_widget, - ), - ), - ) - ), + ('selected author', authors_info_panel), ), ]) diff --git a/src/diffinsights_web/views/authorsgrid.py b/src/diffinsights_web/views/authorsgrid.py new file mode 100644 index 0000000..9ad6d34 --- /dev/null +++ b/src/diffinsights_web/views/authorsgrid.py @@ -0,0 +1,64 @@ +from typing import Optional + +import pandas as pd +import panel as pn +import param + +from diffinsights_web.datastore.timeline import author_timeline_df +from diffinsights_web.views import TimelineView +from diffinsights_web.views.dataexplorer import perspective_pane + + +def authors_list(authors_df: pd.DataFrame, + top_n: Optional[int] = None) -> list[str]: + # TODO: return mapping { "[name] <[email]>": "[email]",... }, + # instead of returning list of emails [ "[email]",... ] + if top_n is None: + return authors_df.index.to_list() + else: + return authors_df.head(top_n).index.to_list() + + +class AuthorInfo(TimelineView): + # NOTE: without `allow_refs=True`, there is a bug in 'param'(?) when trying to create warning + authors_info_df = param.ClassSelector(class_=pd.DataFrame, allow_refs=True) + + def __init__(self, **params): + super().__init__(**params) + + # might be not a Select widget + self.top_n_widget = pn.widgets.Select( + name="top N", + options=[4, 10, 32], + value=4, + ) + + self.authors_list_rx = pn.rx(authors_list)( + authors_df=self.authors_info_df, + top_n=self.top_n_widget, + ) + self.select_author_widget = pn.widgets.Select( + name="author", + options=self.authors_list_rx, + ) + self.author_timeline_df_rx = pn.rx(author_timeline_df)( + resample_by_author_df=self.data_store.resampled_timeline_by_author_rx, + author_id=self.select_author_widget, + ) + + def widgets(self) -> list[pn.viewable.Viewable]: + return [ + self.top_n_widget, + ] + + def __panel__(self) -> pn.viewable.Viewable: + return pn.Column( + self.select_author_widget, + perspective_pane( + df=self.author_timeline_df_rx, + title=pn.rx("repo={repo!r}, author={author!r}").format( + repo=self.data_store.select_repo_widget, + author=self.select_author_widget, + ), + ), + ) From 77a5b5947fdda9d0ba9e415e5ed26b41dfc0b38d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Nar=C4=99bski?= Date: Mon, 25 Nov 2024 19:31:07 +0100 Subject: [PATCH 12/16] diffinsights_web: Add utils/avatars.py Copy gravatar_url() function from 02-contributors_graph.py into this new Python file. Turn on @pn.cache for this function. --- src/diffinsights_web/utils/avatars.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 src/diffinsights_web/utils/avatars.py diff --git a/src/diffinsights_web/utils/avatars.py b/src/diffinsights_web/utils/avatars.py new file mode 100644 index 0000000..1be060f --- /dev/null +++ b/src/diffinsights_web/utils/avatars.py @@ -0,0 +1,25 @@ +import hashlib +from urllib.parse import urlencode + +import panel as pn + + +@pn.cache +def gravatar_url(email: str, size: int = 16) -> str: + # https://docs.gravatar.com/api/avatars/python/ + + # Set default parameters + # ... + + # Encode the email to lowercase and then to bytes + email_encoded = email.lower().encode('utf-8') + + # Generate the SHA256 hash of the email + email_hash = hashlib.sha256(email_encoded).hexdigest() + + # https://docs.gravatar.com/api/avatars/images/ + # Construct the URL with encoded query parameters + query_params = urlencode({'s': str(size)}) # NOTE: will be needed for 'd' parameter + url = f"https://www.gravatar.com/avatar/{email_hash}?{query_params}" + + return url From 9451e3a43e94f8f175e783602bd9bb5770c6b5cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Nar=C4=99bski?= Date: Mon, 25 Nov 2024 20:07:47 +0100 Subject: [PATCH 13/16] diffinsights_web: Add beginnings of AuthorsGrid Each of author cards currently only includes the information about the author and their contribution, without per-author plot. The code was extracted from 02-contributors_graph.py, but instead of using pn.bind() to create partial function, @param.depends was used to automatically run update function (with some caveats). Like in 02-contributors_graph.py, first run of update function is done manually; in 02-contributors_graph.py by adding the call in the body of the script, in view/authorsgrid.py by calling it from AuthorsGrid constructor (from .__init__()). Manually checked that everything works. --- src/diffinsights_web/apps/contributors.py | 8 +- src/diffinsights_web/datastore/timeline.py | 8 ++ src/diffinsights_web/views/authorsgrid.py | 103 ++++++++++++++++++ .../views/plots/timeseries.py | 8 +- 4 files changed, 125 insertions(+), 2 deletions(-) diff --git a/src/diffinsights_web/apps/contributors.py b/src/diffinsights_web/apps/contributors.py index 667868f..d244872 100644 --- a/src/diffinsights_web/apps/contributors.py +++ b/src/diffinsights_web/apps/contributors.py @@ -8,7 +8,7 @@ import diffinsights_web.utils.notifications as notifications from diffinsights_web.datastore.timeline import TimelineDataStore, find_dataset_dir, author_timeline_df from diffinsights_web.utils.notifications import onload_callback -from diffinsights_web.views.authorsgrid import AuthorInfo +from diffinsights_web.views.authorsgrid import AuthorInfo, AuthorsGrid from diffinsights_web.views.dataexplorer import TimelineJSONViewer, TimelinePerspective, TimelineDataFrameEnum, \ perspective_pane from diffinsights_web.views.info import ContributorsHeader, RepoPlotHeader @@ -47,6 +47,11 @@ data_store=data_store, authors_info_df=timeseries_plot.authors_info_df_rx, ) +authors_grid = AuthorsGrid( + data_store=data_store, + authors_info_df=timeseries_plot.authors_info_df_rx, + top_n=authors_info_panel.top_n_widget, +) # Create the dashboard layout template = pn.template.MaterialTemplate( @@ -73,6 +78,7 @@ ), collapsible=False, hide_header=True, ), + authors_grid, ], ) timeline_perspective = TimelinePerspective(data_store=data_store) diff --git a/src/diffinsights_web/datastore/timeline.py b/src/diffinsights_web/datastore/timeline.py index cc73594..3521780 100644 --- a/src/diffinsights_web/datastore/timeline.py +++ b/src/diffinsights_web/datastore/timeline.py @@ -226,6 +226,14 @@ def get_date_range(timeline_df: pd.DataFrame, from_date_str: str): ) +@pn.cache +def get_value_range(timeline_df: pd.DataFrame, column: str = 'n_commits'): + return ( + timeline_df[column].min(), + timeline_df[column].max(), + ) + + # NOTE: consider putting the filter earlier in the pipeline (needs profiling / benchmarking?) # TODO: replace `from_date_str` (raw string) with `from_date` (parsed value) def filter_df_by_from_date(resampled_df: pd.DataFrame, diff --git a/src/diffinsights_web/views/authorsgrid.py b/src/diffinsights_web/views/authorsgrid.py index 9ad6d34..e960f53 100644 --- a/src/diffinsights_web/views/authorsgrid.py +++ b/src/diffinsights_web/views/authorsgrid.py @@ -1,3 +1,4 @@ +from collections import namedtuple from typing import Optional import pandas as pd @@ -5,6 +6,8 @@ import param from diffinsights_web.datastore.timeline import author_timeline_df +from diffinsights_web.utils.avatars import gravatar_url +from diffinsights_web.utils.humanize import html_int_humane from diffinsights_web.views import TimelineView from diffinsights_web.views.dataexplorer import perspective_pane @@ -62,3 +65,103 @@ def __panel__(self) -> pn.viewable.Viewable: ), ), ) + + +def author_info(authors_df: pd.DataFrame, author: str) -> str: + author_s: pd.Series = authors_df.loc[author] + + if not author: + return "{unknown}" + + # TODO: replace inline style with the use of `stylesheets=[stylesheet]` + # uses minus sign '−', rather than dash '-' + return f""" + {html_int_humane(author_s.loc['n_commits'])} commits + + {html_int_humane(int(author_s.loc['p_count']))} ++ + {html_int_humane(int(author_s.loc['m_count']))} −− + + """ + + +def authors_cards(authors_df: pd.DataFrame, + resample_by_author_df: pd.DataFrame, # NOTE: temporarily unused + top_n: int = 4) -> list[pn.layout.Card]: + result: list[pn.layout.Card] = [] + avatar_size = 20 + + Row = namedtuple('Pandas', ['Index', 'n_commits', 'p_count', 'm_count', 'author_name']) + row: Row + for i, row in enumerate(authors_df.head(top_n).itertuples(), start=1): + result.append( + pn.layout.Card( + pn.Column( + pn.FlexBox( + # author.name , using most common author.name + pn.pane.HTML( + '
' + f' ' + f'{row.author_name} <{row.Index}>' + '
' + ), + # position in the top N list + pn.pane.HTML(f'
#{i}
', width=20), + # FlexBox parameters: + # https://css-tricks.com/snippets/css/a-guide-to-flexbox/ + # https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_flexible_box_layout/Basic_concepts_of_flexbox + flex_direction="row", + flex_wrap="nowrap", + justify_content="space-between", + align_items="baseline", + gap="1 rem", + # layoutable parameters + sizing_mode='stretch_width', + width_policy="max", + # width=300, + # styles={"width": "100%"} + ), + pn.pane.HTML( + author_info(authors_df=authors_df, author=row.Index) + ), + ), + hide_header=True, + collapsible=False, + ) + ) + + return result + + +class AuthorsGrid(TimelineView): + authors_info_df = param.ClassSelector(class_=pd.DataFrame, allow_refs=True) + top_n = param.Integer(default=4, allow_refs=True) + + def __init__(self, **params): + super().__init__(**params) + + self.authors_grid = pn.layout.GridBox( + ncols=2, + ) + self.update_authors_grid() + + def __panel__(self) -> pn.viewable.Viewable: + return self.authors_grid + + # NOTE: cannot use 'data_store.resampled_timeline_by_author_rx' as dependency, because of + # AttributeError: Attribute 'resampled_timeline_by_author_rx' could not be resolved on + # NOTE: with `on_init=True`, it looks like this method is run before __init__, and therefore + # AttributeError: 'AuthorsGrid' object has no attribute 'authors_grid' + @param.depends('authors_info_df', 'top_n', watch=True) + def update_authors_grid(self) -> None: + ## DEBUG + print(f"RUNNING update_authors_grid(), with {self.top_n=},...") + + self.authors_grid.clear() + self.authors_grid.extend( + authors_cards( + authors_df=self.authors_info_df, + resample_by_author_df=self.data_store.resampled_timeline_by_author_rx, + top_n=self.top_n, + ) + ) diff --git a/src/diffinsights_web/views/plots/timeseries.py b/src/diffinsights_web/views/plots/timeseries.py index 9c4fbb4..c5b7d59 100644 --- a/src/diffinsights_web/views/plots/timeseries.py +++ b/src/diffinsights_web/views/plots/timeseries.py @@ -6,7 +6,8 @@ import param import hvplot.pandas # noqa -from diffinsights_web.datastore.timeline import get_date_range, filter_df_by_from_date, authors_info_df +from diffinsights_web.datastore.timeline import \ + get_date_range, get_value_range, filter_df_by_from_date, authors_info_df from diffinsights_web.utils.notifications import warning_notification from diffinsights_web.views import TimelineView @@ -211,6 +212,11 @@ def __init__(self, **params): timeline_df=self.data_store.timeline_df_rx, from_date_str=self.param.from_date_str.rx(), ) + self.value_range_rx = pn.rx(get_value_range)( + timeline_df=self.data_store.resampled_timeline_all_rx, + column=self.param.column_name.rx(), + ) + # authors info for authors grid selection self.authors_info_df_rx = pn.rx(authors_info_df)( timeline_df=self.data_store.timeline_df_rx, From 6abc6496f8fe00302d76b3e29c1f9f9812e250a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Nar=C4=99bski?= Date: Mon, 25 Nov 2024 22:06:19 +0100 Subject: [PATCH 14/16] diffinsights_web: Temporarily remove authors_info_panel ('selected author') It turned out that the code extracted out of 02-contributors_graph.py doesn't work with changing the input JSON file, failing with the following error: panel.reactive - Callback failed for object named 'input JSON file' changing property {'value': ...} [...] KeyError: 'mcol@posteo.net' The traceback includes the following reference to the app code: Traceback (most recent call last): [...] File "python-diff-annotator/src/diffinsights_web/datastore/timeline.py", line 207, in author_timeline_df return resample_by_author_df.loc[author_id] One can check that after changing the value of the "input JSON file" widget, the "selected author" tab in tabbed widget has its selector widget with the author from the JSON file before, and old dataframe. As this is not a critical component (though it would be useful for debugging), instead of taking time to tyring to find the source of this error - even though the code looks correct - comment out and remove this component / widget. TODO: find the source of this error, and fix the bug. --- src/diffinsights_web/apps/contributors.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/diffinsights_web/apps/contributors.py b/src/diffinsights_web/apps/contributors.py index d244872..4394480 100644 --- a/src/diffinsights_web/apps/contributors.py +++ b/src/diffinsights_web/apps/contributors.py @@ -43,14 +43,19 @@ column_name=page_header.select_contribution_type_widget, plot=timeseries_plot, ) -authors_info_panel = AuthorInfo( - data_store=data_store, - authors_info_df=timeseries_plot.authors_info_df_rx, +#authors_info_panel = AuthorInfo( +# data_store=data_store, +# authors_info_df=timeseries_plot.authors_info_df_rx, +#) +top_n_widget = pn.widgets.Select( + name="top N", + options=[4, 10, 32], + value=4, ) authors_grid = AuthorsGrid( data_store=data_store, authors_info_df=timeseries_plot.authors_info_df_rx, - top_n=authors_info_panel.top_n_widget, + top_n=top_n_widget, ) # Create the dashboard layout @@ -60,7 +65,8 @@ favicon="favicon.svg", sidebar=[ data_store, - *authors_info_panel.widgets(), + #*authors_info_panel.widgets(), + top_n_widget, pn.layout.Divider(), # - - - - - - - - - - - - - @@ -98,7 +104,7 @@ from_date=page_header.select_period_from_widget) ) ), - ('selected author', authors_info_panel), + #('selected author', authors_info_panel), ), ]) From 47c279f31e64a3a1ae70909bc247a5c8d9c6f865 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Nar=C4=99bski?= Date: Mon, 25 Nov 2024 22:58:57 +0100 Subject: [PATCH 15/16] diffinsights_web: Make authors_card() into method in AuthorsGrid With how TimeseriesPlotForAuthor is being constructed, having authors_card() be a function would mean having to pass too many parameters. So make it a no-argument method. Also adds some slight improvements to comments, some commented-out debug-print statements, and prints more informmation in debug-print statement that remains. --- src/diffinsights_web/views/authorsgrid.py | 116 +++++++++++----------- 1 file changed, 58 insertions(+), 58 deletions(-) diff --git a/src/diffinsights_web/views/authorsgrid.py b/src/diffinsights_web/views/authorsgrid.py index e960f53..fbd096a 100644 --- a/src/diffinsights_web/views/authorsgrid.py +++ b/src/diffinsights_web/views/authorsgrid.py @@ -84,55 +84,6 @@ def author_info(authors_df: pd.DataFrame, author: str) -> str: """ -def authors_cards(authors_df: pd.DataFrame, - resample_by_author_df: pd.DataFrame, # NOTE: temporarily unused - top_n: int = 4) -> list[pn.layout.Card]: - result: list[pn.layout.Card] = [] - avatar_size = 20 - - Row = namedtuple('Pandas', ['Index', 'n_commits', 'p_count', 'm_count', 'author_name']) - row: Row - for i, row in enumerate(authors_df.head(top_n).itertuples(), start=1): - result.append( - pn.layout.Card( - pn.Column( - pn.FlexBox( - # author.name , using most common author.name - pn.pane.HTML( - '
' - f' ' - f'{row.author_name} <{row.Index}>' - '
' - ), - # position in the top N list - pn.pane.HTML(f'
#{i}
', width=20), - # FlexBox parameters: - # https://css-tricks.com/snippets/css/a-guide-to-flexbox/ - # https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_flexible_box_layout/Basic_concepts_of_flexbox - flex_direction="row", - flex_wrap="nowrap", - justify_content="space-between", - align_items="baseline", - gap="1 rem", - # layoutable parameters - sizing_mode='stretch_width', - width_policy="max", - # width=300, - # styles={"width": "100%"} - ), - pn.pane.HTML( - author_info(authors_df=authors_df, author=row.Index) - ), - ), - hide_header=True, - collapsible=False, - ) - ) - - return result - - class AuthorsGrid(TimelineView): authors_info_df = param.ClassSelector(class_=pd.DataFrame, allow_refs=True) top_n = param.Integer(default=4, allow_refs=True) @@ -148,20 +99,69 @@ def __init__(self, **params): def __panel__(self) -> pn.viewable.Viewable: return self.authors_grid + def authors_cards(self): + result: list[pn.layout.Card] = [] + avatar_size = 20 # TODO: make it configurable, eg. via param + + # TODO: pass `field_names` or `Row` as parameters + Row = namedtuple(typename='Pandas', field_names=['Index', 'n_commits', 'p_count', 'm_count', 'author_name']) + row: Row + #print(f"{self.authors_info_df.columns=}") + for i, row in enumerate(self.authors_info_df.head(self.top_n).itertuples(), start=1): + #print(f"{i=}, {row=}") + result.append( + pn.layout.Card( + pn.Column( + pn.FlexBox( + # author.name , using most common author.name + pn.pane.HTML( + '
' + f' ' + f'{row.author_name} <{row.Index}>' + '
' + ), + # position in the top N list + pn.pane.HTML(f'
#{i}
', width=20), + # FlexBox parameters: + # https://css-tricks.com/snippets/css/a-guide-to-flexbox/ + # https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_flexible_box_layout/Basic_concepts_of_flexbox + flex_direction="row", + flex_wrap="nowrap", + justify_content="space-between", + align_items="baseline", + gap="1 rem", + # layoutable parameters + sizing_mode='stretch_width', + width_policy="max", + # width=300, + # styles={"width": "100%"} + ), + pn.pane.HTML( + author_info( + authors_df=self.authors_info_df, + author=row.Index + ) + ), + ), + hide_header=True, + collapsible=False, + ) + ) + + return result + # NOTE: cannot use 'data_store.resampled_timeline_by_author_rx' as dependency, because of - # AttributeError: Attribute 'resampled_timeline_by_author_rx' could not be resolved on + # AttributeError: Attribute 'resampled_timeline_by_author_rx' could not be resolved on # NOTE: with `on_init=True`, it looks like this method is run before __init__, and therefore - # AttributeError: 'AuthorsGrid' object has no attribute 'authors_grid' + # AttributeError: 'AuthorsGrid' object has no attribute 'authors_grid' + # NOTE: updated twice when changing JSON file, but only once when changing top_n, or contributions @param.depends('authors_info_df', 'top_n', watch=True) def update_authors_grid(self) -> None: ## DEBUG - print(f"RUNNING update_authors_grid(), with {self.top_n=},...") - + print(f"RUNNING update_authors_grid(), with repo={self.data_store.select_repo_widget.value}, top_n={self.top_n},...") + self.authors_grid.clear() self.authors_grid.extend( - authors_cards( - authors_df=self.authors_info_df, - resample_by_author_df=self.data_store.resampled_timeline_by_author_rx, - top_n=self.top_n, - ) + self.authors_cards() ) From 52774cb61ae498b868fb510e4e517c1191923406 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Nar=C4=99bski?= Date: Mon, 25 Nov 2024 23:53:37 +0100 Subject: [PATCH 16/16] diffinsights_web: Add per-author plots for each author in the grid Uses TimeseriesPlotForAuthor class to add per-author plot (same as the main plot for the repo) for each author in the top N authors grid. NOTE that the problem with the fact that per-author dataframe can have some of its periods missing, instead of being filled with 0s or NaNs. To be fixed in some future commit, see "Un-sparseify time series" section in notebooks/panel/02-contributors_graph.ipyb --- src/diffinsights_web/apps/contributors.py | 1 + src/diffinsights_web/views/authorsgrid.py | 20 +++++++++++-- .../views/plots/timeseries.py | 30 +++++++++++++++++++ 3 files changed, 48 insertions(+), 3 deletions(-) diff --git a/src/diffinsights_web/apps/contributors.py b/src/diffinsights_web/apps/contributors.py index 4394480..64a8ddd 100644 --- a/src/diffinsights_web/apps/contributors.py +++ b/src/diffinsights_web/apps/contributors.py @@ -54,6 +54,7 @@ ) authors_grid = AuthorsGrid( data_store=data_store, + main_plot=timeseries_plot, authors_info_df=timeseries_plot.authors_info_df_rx, top_n=top_n_widget, ) diff --git a/src/diffinsights_web/views/authorsgrid.py b/src/diffinsights_web/views/authorsgrid.py index fbd096a..1965648 100644 --- a/src/diffinsights_web/views/authorsgrid.py +++ b/src/diffinsights_web/views/authorsgrid.py @@ -10,6 +10,7 @@ from diffinsights_web.utils.humanize import html_int_humane from diffinsights_web.views import TimelineView from diffinsights_web.views.dataexplorer import perspective_pane +from diffinsights_web.views.plots.timeseries import TimeseriesPlotForAuthor, TimeseriesPlot def authors_list(authors_df: pd.DataFrame, @@ -85,12 +86,18 @@ def author_info(authors_df: pd.DataFrame, author: str) -> str: class AuthorsGrid(TimelineView): - authors_info_df = param.ClassSelector(class_=pd.DataFrame, allow_refs=True) + main_plot = param.ClassSelector(class_=TimeseriesPlot, allow_refs=True) + # NOTE: needed only because of @params.depends works only with _parameters_ + # TODO: replace with a .rx.watch(...), or @pn.depends(...), or something + authors_info_df=param.ClassSelector(class_=pd.DataFrame, allow_refs=True) top_n = param.Integer(default=4, allow_refs=True) def __init__(self, **params): + #print(f"AuthorsGrid::__init__(self, **{params=})") super().__init__(**params) + #self.authors_info_df = self.main_plot.authors_info_df_rx + self.authors_grid = pn.layout.GridBox( ncols=2, ) @@ -100,12 +107,13 @@ def __panel__(self) -> pn.viewable.Viewable: return self.authors_grid def authors_cards(self): + #print("RUNNING AuthorsGrid::authors_cards()") result: list[pn.layout.Card] = [] avatar_size = 20 # TODO: make it configurable, eg. via param # TODO: pass `field_names` or `Row` as parameters - Row = namedtuple(typename='Pandas', field_names=['Index', 'n_commits', 'p_count', 'm_count', 'author_name']) - row: Row + RowT = namedtuple(typename='Pandas', field_names=['Index', 'n_commits', 'p_count', 'm_count', 'author_name']) + row: RowT #print(f"{self.authors_info_df.columns=}") for i, row in enumerate(self.authors_info_df.head(self.top_n).itertuples(), start=1): #print(f"{i=}, {row=}") @@ -138,11 +146,17 @@ def authors_cards(self): # styles={"width": "100%"} ), pn.pane.HTML( + # TODO: pass tuple instead author_info( authors_df=self.authors_info_df, author=row.Index ) ), + TimeseriesPlotForAuthor( + data_store=self.data_store, + main_plot=self.main_plot, + author_email=row.Index, + ), ), hide_header=True, collapsible=False, diff --git a/src/diffinsights_web/views/plots/timeseries.py b/src/diffinsights_web/views/plots/timeseries.py index c5b7d59..8107d33 100644 --- a/src/diffinsights_web/views/plots/timeseries.py +++ b/src/diffinsights_web/views/plots/timeseries.py @@ -242,3 +242,33 @@ def __panel__(self) -> pn.viewable.Viewable: self.plot_commits_rx, theme=self.select_plot_theme_widget, ) + + +class TimeseriesPlotForAuthor(TimelineView): + main_plot = param.ClassSelector(class_=TimeseriesPlot) + author_email = param.String() + + def __init__(self, **params): + #print("TimeseriesPlotForAuthor.__init__()") + super().__init__(**params) + + self.plot_commits_rx = pn.rx(plot_commits)( + resampled_df=self.main_plot.data_store.resampled_timeline_by_author_rx.loc[self.author_email], + column=self.main_plot.param.column_name.rx(), + from_date_str=self.main_plot.param.from_date_str.rx(), + xlim=self.main_plot.date_range_rx, + ylim=self.main_plot.value_range_rx, # TODO: allow to switch between totals, max N, and own + ) + + def __panel__(self) -> pn.viewable.Viewable: + #print("TimeseriesPlotForAuthor.__panel__()") + return pn.pane.HoloViews( + self.plot_commits_rx, + theme=self.main_plot.select_plot_theme_widget, + # sizing configuration + height=256, # TODO: find a better way than fixed height + sizing_mode='stretch_width', + # sizing_mode='scale_both', # NOTE: does not work, and neither does 'stretch_both' + # aspect_ratio=1.5, # NOTE: does not help to use 'scale_both'/'stretch_both' + margin=5, + )