ncusi · jnareb · Nov 26, 2024 · Nov 25, 2024 · Nov 26, 2024 · Nov 26, 2024
diff --git a/src/diffinsights_web/apps/contributors.py b/src/diffinsights_web/apps/contributors.py
@@ -1,17 +1,17 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 from typing import Optional

 import pandas as pd
 import panel as pn

 import diffinsights_web.utils.notifications as notifications
 from diffinsights_web.datastore.timeline import TimelineDataStore, find_dataset_dir, author_timeline_df
 from diffinsights_web.utils.notifications import onload_callback
 from diffinsights_web.views.authorsgrid import AuthorInfo, AuthorsGrid
 from diffinsights_web.views.dataexplorer import TimelineJSONViewer, TimelinePerspective, TimelineDataFrameEnum, \
     perspective_pane
-from diffinsights_web.views.info import ContributorsHeader, RepoPlotHeader
+from diffinsights_web.views.info import ContributorsHeader, RepoPlotHeader, ContributionsPercHeader
 from diffinsights_web.views.plots.timeseries import TimeseriesPlot
 from diffinsights_web.widgets.caching import ClearCacheButton
 
@@ -43,13 +43,17 @@
     column_name=page_header.select_contribution_type_widget,
     plot=timeseries_plot,
 )
+contributions_perc_header = ContributionsPercHeader(
+    data_store=data_store,
+    from_date_str=page_header.select_period_from_widget,
+)
 #authors_info_panel = AuthorInfo(
 #    data_store=data_store,
 #    authors_info_df=timeseries_plot.authors_info_df_rx,
 #)
 top_n_widget = pn.widgets.Select(
     name="top N",
-    options=[4, 10, 32],
+    options=[2, 4, 10, 32],
     value=4,
 )
 authors_grid = AuthorsGrid(
@@ -81,6 +85,7 @@
         pn.Card(
             pn.Column(
                 timeseries_plot_header,
+                contributions_perc_header,
                 timeseries_plot,
             ),
             collapsible=False, hide_header=True,

diff --git a/src/diffinsights_web/datastore/timeline.py b/src/diffinsights_web/datastore/timeline.py
@@ -8,6 +8,7 @@
 
 from diffinsights_web.utils.notifications import warning_notification
 
+
 DATASET_DIR = 'data/examples/stats'
 
 
@@ -207,6 +208,13 @@ def author_timeline_df(resample_by_author_df: pd.DataFrame, author_id: str) -> p
     return resample_by_author_df.loc[author_id]
 
 
+def author_timeline_df_freq(resample_by_author_df: pd.DataFrame,
+                            author_id: str,
+                            resample_rate: str) -> pd.DataFrame:
+    # NOTE: instead of .asfreq(<freq>) one can use .resample(<freq>).first() instead
+    return resample_by_author_df.loc[author_id].asfreq(resample_rate).fillna(0)
+
+
 @pn.cache
 def get_date_range(timeline_df: pd.DataFrame, from_date_str: str):
     # TODO: create reactive component or bound function to compute from_date to avoid recalculations
@@ -228,6 +236,11 @@ def get_date_range(timeline_df: pd.DataFrame, from_date_str: str):
 
 @pn.cache
 def get_value_range(timeline_df: pd.DataFrame, column: str = 'n_commits'):
+    # problems importing SpecialColumnsEnum - circular dependency
+    # therefore use more generic solution: protect against all key errors
+    if column not in timeline_df.columns:
+        return 0.0, 1.0
+
     return (
         timeline_df[column].min(),
         timeline_df[column].max(),

diff --git a/src/diffinsights_web/views/authorsgrid.py b/src/diffinsights_web/views/authorsgrid.py
@@ -10,6 +10,7 @@
 from diffinsights_web.utils.humanize import html_int_humane
 from diffinsights_web.views import TimelineView
 from diffinsights_web.views.dataexplorer import perspective_pane
+from diffinsights_web.views.info import ContributionsPercHeader
 from diffinsights_web.views.plots.timeseries import TimeseriesPlotForAuthor, TimeseriesPlot
 
 
@@ -152,6 +153,12 @@ def authors_cards(self):
                                 author=row.Index
                             )
                         ),
+                        ContributionsPercHeader(
+                            data_store=self.data_store,
+                            from_date_str=self.main_plot.param.from_date_str.rx(),
+                            author_id=row.Index,
+                            show_descr=False,
+                        ),
                         TimeseriesPlotForAuthor(
                             data_store=self.data_store,
                             main_plot=self.main_plot,
@@ -173,7 +180,7 @@ def authors_cards(self):
     @param.depends('authors_info_df', 'top_n', watch=True)
     def update_authors_grid(self) -> None:
         ## DEBUG
-        print(f"RUNNING update_authors_grid(), with repo={self.data_store.select_repo_widget.value}, top_n={self.top_n},...")
+        #print(f"RUNNING update_authors_grid(), with repo={self.data_store.select_repo_widget.value}, top_n={self.top_n},...")
 
         self.authors_grid.clear()
         self.authors_grid.extend(

diff --git a/src/diffinsights_web/views/info.py b/src/diffinsights_web/views/info.py
@@ -1,12 +1,16 @@
 import datetime
+from collections import Counter
+from typing import Optional
 
+import pandas as pd
 import panel as pn
 import param
 from dateutil.relativedelta import relativedelta
 
-from diffinsights_web.datastore.timeline import frequency_names
+from diffinsights_web.datastore.timeline import frequency_names, filter_df_by_from_date, get_pm_count_cols
 from diffinsights_web.utils.humanize import html_date_humane
-from diffinsights_web.views.plots.timeseries import SpecialColumn, TimeseriesPlot
+from diffinsights_web.views import TimelineView
+from diffinsights_web.views.plots.timeseries import SpecialColumnEnum, TimeseriesPlot
 
 
 # common for all classes defined here
@@ -43,7 +47,8 @@ def time_range_options() -> dict[str, str]:
     "Patch size (lines)": "diff.patch_size",
     "Patch spreading (lines)": "diff.groups_spread",
     # special cases:
-    "Line types distribution [%]": SpecialColumn.LINE_TYPES_PERC.value,
+    "Line types distribution [%]": SpecialColumnEnum.LINE_TYPES_PERC.value,
+    "No plot": SpecialColumnEnum.NO_PLOT.value  # this special value should be last
 }
 column_to_contribution = {
     v: k for k, v in contribution_types_map.items()
@@ -117,7 +122,7 @@ def sampling_info(resample_freq: str,
     contribution_type = column_to_contribution.get(column, "Unknown type of contribution")
 
     return f"""
-    <strong>{contribution_type} over time</strong>
+    <strong>{contribution_type}{' over time' if column != SpecialColumnEnum.NO_PLOT.value else ''}</strong>
     <p>
     {frequency_names_map.get(resample_freq, 'unknown frequency').title()}ly
     from {html_date_humane(min_max_date[0])}
@@ -154,3 +159,134 @@ def __panel__(self):
             self.sampling_info_rx,
             styles=head_styles
         )
+
+
+def contributions_perc_info(timeline_df: pd.DataFrame,
+                            from_date_str: str,
+                            author_id: Optional[str] = None,
+                            show_descr: bool = False):
+    types = [
+        'code',
+        'documentation',
+        'test',
+        'data',
+        'markup',
+        'other'
+    ]
+    css = """
+    .bar-container {
+        width: 100%;
+        height: 8px;
+        border-radius: 6px;
+        border: 1px solid;
+        display: flex;
+    }
+    .bar {
+        height: 6px;
+        display: block;
+        outline: 2px solid #0000;
+        padding: 1px 0px;
+    }
+    .bar-code { background-color: #4363d8; }
+    .bar-documentation { background-color: #9A6324; }
+    .bar-test { background-color: #3cb44b; }
+    .bar-data { background-color: #ffe119; }
+    .bar-markup { background-color: #800000; }
+    .bar-other { background-color: #a9a9a9; }
+    .svg-code { fill: #4363d8; }
+    .svg-documentation { fill: #9A6324; }
+    .svg-test { fill: #3cb44b; }
+    .svg-data { fill: #ffe119; }
+    .svg-markup { fill: #800000; }
+    .svg-other { fill: #a9a9a9; }
+    ul.horizontal {
+        list-style: none !important;
+        display: flex;
+        margin-left: 0px;
+        padding-left: 0rem;
+    }
+    ul.horizontal li {
+        display: inline-flex;
+        padding-right: 1rem;
+    }
+    """
+    filtered_df = filter_df_by_from_date(timeline_df, from_date_str)
+    if author_id is not None:
+        filtered_df = filtered_df[filtered_df['author.email'] == author_id]
+
+    pm_count_cols = get_pm_count_cols(timeline_df)
+    pm_count_sum = filtered_df[pm_count_cols].sum().to_dict()
+
+    line_kind_sum = Counter()
+    for line_kind in types:
+        for pm in list("-+"):
+            col_name = f"{pm}:type.{line_kind}"
+            if col_name in pm_count_sum:
+                line_kind_sum[line_kind] += pm_count_sum[col_name]
+            else:
+                line_kind_sum[line_kind] += 0
+
+    # NOTE: could be used as alternative way of computing
+    for col_name, col_sum in pm_count_sum.items():
+        line_kind = col_name[len("+:type."):]
+        if line_kind in types:
+            continue  # already counted
+
+        # catch every line type not in `types` into "other" category
+        if col_name.startswith('-:type.') or col_name.startswith('+:type.'):
+            line_kind_sum["other"] += col_sum
+
+    total_lines = 0
+    for pm in list("-+"):
+        if f"{pm}:count" in pm_count_sum:
+            total_lines += pm_count_sum[f"{pm}:count"]
+
+    html_parts = ['<div class="bar-container">']
+    for line_kind in types:
+        val_perc = 100.0*line_kind_sum[line_kind]/total_lines
+        html_parts.append(
+            f'<span class="bar bar-{line_kind}"'
+            f' style="width: {val_perc:.1f}%;" title="{line_kind}: {val_perc:.1f}%"></span>'
+        )
+    html_parts.append('</div>')
+
+    if show_descr:
+        html_parts.append('<ul class="horizontal">')
+        for line_kind in types:
+            val_perc = 100.0 * line_kind_sum[line_kind] / total_lines
+            html_parts.append(
+                '<li>'
+                f'<svg class="svg-{line_kind}" aria-hidden="true"'
+                ' width="16" height="16" viewBox="0 0 16 16" version="1.1">'
+                '<circle cx="8" cy="8" r="4" />'
+                '</svg>'
+                f'{line_kind}:&nbsp;{val_perc:.1f}%'
+                '</li>'
+            )
+        html_parts.append('</ul>')
+
+    return pn.pane.HTML(
+        '\n'.join(html_parts),
+        stylesheets=[css],
+        sizing_mode='stretch_width',
+    )
+
+
+class ContributionsPercHeader(TimelineView):
+    author_id = param.String(None)
+    from_date_str = param.String(allow_refs=True)  # allow_refs=True is here to allow widgets
+    show_descr = param.Boolean(True)
+
+    def __init__(self, **params):
+        super().__init__(**params)
+
+        # TODO: fix the bug with the output not updating on updated `from_date_str` widget
+        self.contributions_perc_info_rx = pn.rx(contributions_perc_info)(
+            timeline_df=self.data_store.timeline_df_rx,
+            from_date_str=self.param.from_date_str.rx(),
+            author_id=self.author_id,
+            show_descr=self.show_descr,
+        )
+
+    def __panel__(self) -> pn.viewable.Viewable:
+        return self.contributions_perc_info_rx.rx.value
diff --git a/src/diffinsights_web/views/plots/timeseries.py b/src/diffinsights_web/views/plots/timeseries.py
@@ -7,13 +7,14 @@
 import hvplot.pandas  # noqa
 
 from diffinsights_web.datastore.timeline import \
-    get_date_range, get_value_range, filter_df_by_from_date, authors_info_df
+    get_date_range, get_value_range, filter_df_by_from_date, authors_info_df, author_timeline_df_freq
 from diffinsights_web.utils.notifications import warning_notification
 from diffinsights_web.views import TimelineView
 
 
-class SpecialColumn(Enum):
+class SpecialColumnEnum(Enum):
     LINE_TYPES_PERC = "KIND [%]"
+    NO_PLOT = "<NO PLOT>"
 
 
 def line_type_sorting_key(column_name: str) -> int:
@@ -39,6 +40,10 @@ def plot_commits(resampled_df: pd.DataFrame,
                  xlim: Optional[tuple] = None,
                  ylim: Optional[tuple] = None,
                  kind: str = 'step'):
+    # super special case
+    if column == SpecialColumnEnum.NO_PLOT.value:
+        return
+
     filtered_df = filter_df_by_from_date(resampled_df, from_date_str)
 
     hvplot_kwargs = {}
@@ -65,7 +70,7 @@ def plot_commits(resampled_df: pd.DataFrame,
             ylim = (-1, ylim[1])
 
     # special cases: y range limits
-    if column == SpecialColumn.LINE_TYPES_PERC.value:
+    if column == SpecialColumnEnum.LINE_TYPES_PERC.value:
         ylim = (0.0, 1.05)
 
     # via https://oklch-palette.vercel.app/ and https://htmlcolorcodes.com/rgb-to-hex/
@@ -82,7 +87,7 @@ def plot_commits(resampled_df: pd.DataFrame,
     color = color_map.get(column, '#006dd8')
 
     # special cases: the plot itself
-    if column == SpecialColumn.LINE_TYPES_PERC.value:
+    if column == SpecialColumnEnum.LINE_TYPES_PERC.value:
         kind_perc_columns = [
             col for col in resampled_df.columns
             if col.startswith('type.') and col.endswith(' [%]')
@@ -238,9 +243,15 @@ def __init__(self, **params):
         )
 
     def __panel__(self) -> pn.viewable.Viewable:
+        if self.column_name == SpecialColumnEnum.NO_PLOT.value:
+            return pn.Spacer(height=0)
+
         return pn.pane.HoloViews(
             self.plot_commits_rx,
             theme=self.select_plot_theme_widget,
+            # sizing configuration
+            height=350,  # TODO: find a better way than fixed height
+            sizing_mode='stretch_width',
         )
 
 
@@ -252,8 +263,14 @@ def __init__(self, **params):
         #print("TimeseriesPlotForAuthor.__init__()")
         super().__init__(**params)
 
+        self.resampled_df_rx = pn.rx(author_timeline_df_freq)(
+            resample_by_author_df=self.main_plot.data_store.resampled_timeline_by_author_rx,
+            author_id=self.author_email,
+            resample_rate=self.data_store.resample_frequency_widget,
+        )
+
         self.plot_commits_rx = pn.rx(plot_commits)(
-            resampled_df=self.main_plot.data_store.resampled_timeline_by_author_rx.loc[self.author_email],
+            resampled_df=self.resampled_df_rx,
             column=self.main_plot.param.column_name.rx(),
             from_date_str=self.main_plot.param.from_date_str.rx(),
             xlim=self.main_plot.date_range_rx,
@@ -262,6 +279,9 @@ def __init__(self, **params):
 
     def __panel__(self) -> pn.viewable.Viewable:
         #print("TimeseriesPlotForAuthor.__panel__()")
+        if self.main_plot.column_name == SpecialColumnEnum.NO_PLOT.value:
+            return pn.Spacer(height=0)
+
         return pn.pane.HoloViews(
             self.plot_commits_rx,
             theme=self.main_plot.select_plot_theme_widget,