Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Visualization: Add line types split indicator, similar to languages split indicator on GitHub #55

Merged
merged 7 commits into from
Nov 26, 2024
9 changes: 7 additions & 2 deletions src/diffinsights_web/apps/contributors.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from typing import Optional

Check failure on line 3 in src/diffinsights_web/apps/contributors.py

View workflow job for this annotation

GitHub Actions / build

Ruff (F401)

src/diffinsights_web/apps/contributors.py:3:20: F401 `typing.Optional` imported but unused

import pandas as pd

Check failure on line 5 in src/diffinsights_web/apps/contributors.py

View workflow job for this annotation

GitHub Actions / build

Ruff (F401)

src/diffinsights_web/apps/contributors.py:5:18: F401 `pandas` imported but unused
import panel as pn

import diffinsights_web.utils.notifications as notifications
from diffinsights_web.datastore.timeline import TimelineDataStore, find_dataset_dir, author_timeline_df

Check failure on line 9 in src/diffinsights_web/apps/contributors.py

View workflow job for this annotation

GitHub Actions / build

Ruff (F401)

src/diffinsights_web/apps/contributors.py:9:86: F401 `diffinsights_web.datastore.timeline.author_timeline_df` imported but unused
from diffinsights_web.utils.notifications import onload_callback
from diffinsights_web.views.authorsgrid import AuthorInfo, AuthorsGrid

Check failure on line 11 in src/diffinsights_web/apps/contributors.py

View workflow job for this annotation

GitHub Actions / build

Ruff (F401)

src/diffinsights_web/apps/contributors.py:11:48: F401 `diffinsights_web.views.authorsgrid.AuthorInfo` imported but unused
from diffinsights_web.views.dataexplorer import TimelineJSONViewer, TimelinePerspective, TimelineDataFrameEnum, \
perspective_pane
from diffinsights_web.views.info import ContributorsHeader, RepoPlotHeader
from diffinsights_web.views.info import ContributorsHeader, RepoPlotHeader, ContributionsPercHeader
from diffinsights_web.views.plots.timeseries import TimeseriesPlot
from diffinsights_web.widgets.caching import ClearCacheButton

Expand Down Expand Up @@ -43,13 +43,17 @@
column_name=page_header.select_contribution_type_widget,
plot=timeseries_plot,
)
contributions_perc_header = ContributionsPercHeader(
data_store=data_store,
from_date_str=page_header.select_period_from_widget,
)
#authors_info_panel = AuthorInfo(
# data_store=data_store,
# authors_info_df=timeseries_plot.authors_info_df_rx,
#)
top_n_widget = pn.widgets.Select(
name="top N",
options=[4, 10, 32],
options=[2, 4, 10, 32],
value=4,
)
authors_grid = AuthorsGrid(
Expand Down Expand Up @@ -81,6 +85,7 @@
pn.Card(
pn.Column(
timeseries_plot_header,
contributions_perc_header,
timeseries_plot,
),
collapsible=False, hide_header=True,
Expand Down
13 changes: 13 additions & 0 deletions src/diffinsights_web/datastore/timeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from diffinsights_web.utils.notifications import warning_notification


DATASET_DIR = 'data/examples/stats'


Expand Down Expand Up @@ -207,6 +208,13 @@ def author_timeline_df(resample_by_author_df: pd.DataFrame, author_id: str) -> p
return resample_by_author_df.loc[author_id]


def author_timeline_df_freq(resample_by_author_df: pd.DataFrame,
author_id: str,
resample_rate: str) -> pd.DataFrame:
# NOTE: instead of .asfreq(<freq>) one can use .resample(<freq>).first() instead
return resample_by_author_df.loc[author_id].asfreq(resample_rate).fillna(0)


@pn.cache
def get_date_range(timeline_df: pd.DataFrame, from_date_str: str):
# TODO: create reactive component or bound function to compute from_date to avoid recalculations
Expand All @@ -228,6 +236,11 @@ def get_date_range(timeline_df: pd.DataFrame, from_date_str: str):

@pn.cache
def get_value_range(timeline_df: pd.DataFrame, column: str = 'n_commits'):
# problems importing SpecialColumnsEnum - circular dependency
# therefore use more generic solution: protect against all key errors
if column not in timeline_df.columns:
return 0.0, 1.0

return (
timeline_df[column].min(),
timeline_df[column].max(),
Expand Down
9 changes: 8 additions & 1 deletion src/diffinsights_web/views/authorsgrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from diffinsights_web.utils.humanize import html_int_humane
from diffinsights_web.views import TimelineView
from diffinsights_web.views.dataexplorer import perspective_pane
from diffinsights_web.views.info import ContributionsPercHeader
from diffinsights_web.views.plots.timeseries import TimeseriesPlotForAuthor, TimeseriesPlot


Expand Down Expand Up @@ -152,6 +153,12 @@ def authors_cards(self):
author=row.Index
)
),
ContributionsPercHeader(
data_store=self.data_store,
from_date_str=self.main_plot.param.from_date_str.rx(),
author_id=row.Index,
show_descr=False,
),
TimeseriesPlotForAuthor(
data_store=self.data_store,
main_plot=self.main_plot,
Expand All @@ -173,7 +180,7 @@ def authors_cards(self):
@param.depends('authors_info_df', 'top_n', watch=True)
def update_authors_grid(self) -> None:
## DEBUG
print(f"RUNNING update_authors_grid(), with repo={self.data_store.select_repo_widget.value}, top_n={self.top_n},...")
#print(f"RUNNING update_authors_grid(), with repo={self.data_store.select_repo_widget.value}, top_n={self.top_n},...")

self.authors_grid.clear()
self.authors_grid.extend(
Expand Down
144 changes: 140 additions & 4 deletions src/diffinsights_web/views/info.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import datetime
from collections import Counter
from typing import Optional

import pandas as pd
import panel as pn
import param
from dateutil.relativedelta import relativedelta

from diffinsights_web.datastore.timeline import frequency_names
from diffinsights_web.datastore.timeline import frequency_names, filter_df_by_from_date, get_pm_count_cols
from diffinsights_web.utils.humanize import html_date_humane
from diffinsights_web.views.plots.timeseries import SpecialColumn, TimeseriesPlot
from diffinsights_web.views import TimelineView
from diffinsights_web.views.plots.timeseries import SpecialColumnEnum, TimeseriesPlot


# common for all classes defined here
Expand Down Expand Up @@ -43,7 +47,8 @@ def time_range_options() -> dict[str, str]:
"Patch size (lines)": "diff.patch_size",
"Patch spreading (lines)": "diff.groups_spread",
# special cases:
"Line types distribution [%]": SpecialColumn.LINE_TYPES_PERC.value,
"Line types distribution [%]": SpecialColumnEnum.LINE_TYPES_PERC.value,
"No plot": SpecialColumnEnum.NO_PLOT.value # this special value should be last
}
column_to_contribution = {
v: k for k, v in contribution_types_map.items()
Expand Down Expand Up @@ -117,7 +122,7 @@ def sampling_info(resample_freq: str,
contribution_type = column_to_contribution.get(column, "Unknown type of contribution")

return f"""
<strong>{contribution_type} over time</strong>
<strong>{contribution_type}{' over time' if column != SpecialColumnEnum.NO_PLOT.value else ''}</strong>
<p>
{frequency_names_map.get(resample_freq, 'unknown frequency').title()}ly
from {html_date_humane(min_max_date[0])}
Expand Down Expand Up @@ -154,3 +159,134 @@ def __panel__(self):
self.sampling_info_rx,
styles=head_styles
)


def contributions_perc_info(timeline_df: pd.DataFrame,
from_date_str: str,
author_id: Optional[str] = None,
show_descr: bool = False):
types = [
'code',
'documentation',
'test',
'data',
'markup',
'other'
]
css = """
.bar-container {
width: 100%;
height: 8px;
border-radius: 6px;
border: 1px solid;
display: flex;
}
.bar {
height: 6px;
display: block;
outline: 2px solid #0000;
padding: 1px 0px;
}
.bar-code { background-color: #4363d8; }
.bar-documentation { background-color: #9A6324; }
.bar-test { background-color: #3cb44b; }
.bar-data { background-color: #ffe119; }
.bar-markup { background-color: #800000; }
.bar-other { background-color: #a9a9a9; }
.svg-code { fill: #4363d8; }
.svg-documentation { fill: #9A6324; }
.svg-test { fill: #3cb44b; }
.svg-data { fill: #ffe119; }
.svg-markup { fill: #800000; }
.svg-other { fill: #a9a9a9; }
ul.horizontal {
list-style: none !important;
display: flex;
margin-left: 0px;
padding-left: 0rem;
}
ul.horizontal li {
display: inline-flex;
padding-right: 1rem;
}
"""
filtered_df = filter_df_by_from_date(timeline_df, from_date_str)
if author_id is not None:
filtered_df = filtered_df[filtered_df['author.email'] == author_id]

pm_count_cols = get_pm_count_cols(timeline_df)
pm_count_sum = filtered_df[pm_count_cols].sum().to_dict()

line_kind_sum = Counter()
for line_kind in types:
for pm in list("-+"):
col_name = f"{pm}:type.{line_kind}"
if col_name in pm_count_sum:
line_kind_sum[line_kind] += pm_count_sum[col_name]
else:
line_kind_sum[line_kind] += 0

# NOTE: could be used as alternative way of computing
for col_name, col_sum in pm_count_sum.items():
line_kind = col_name[len("+:type."):]
if line_kind in types:
continue # already counted

# catch every line type not in `types` into "other" category
if col_name.startswith('-:type.') or col_name.startswith('+:type.'):
line_kind_sum["other"] += col_sum

total_lines = 0
for pm in list("-+"):
if f"{pm}:count" in pm_count_sum:
total_lines += pm_count_sum[f"{pm}:count"]

html_parts = ['<div class="bar-container">']
for line_kind in types:
val_perc = 100.0*line_kind_sum[line_kind]/total_lines
html_parts.append(
f'<span class="bar bar-{line_kind}"'
f' style="width: {val_perc:.1f}%;" title="{line_kind}: {val_perc:.1f}%"></span>'
)
html_parts.append('</div>')

if show_descr:
html_parts.append('<ul class="horizontal">')
for line_kind in types:
val_perc = 100.0 * line_kind_sum[line_kind] / total_lines
html_parts.append(
'<li>'
f'<svg class="svg-{line_kind}" aria-hidden="true"'
' width="16" height="16" viewBox="0 0 16 16" version="1.1">'
'<circle cx="8" cy="8" r="4" />'
'</svg>'
f'{line_kind}:&nbsp;{val_perc:.1f}%'
'</li>'
)
html_parts.append('</ul>')

return pn.pane.HTML(
'\n'.join(html_parts),
stylesheets=[css],
sizing_mode='stretch_width',
)


class ContributionsPercHeader(TimelineView):
author_id = param.String(None)
from_date_str = param.String(allow_refs=True) # allow_refs=True is here to allow widgets
show_descr = param.Boolean(True)

def __init__(self, **params):
super().__init__(**params)

# TODO: fix the bug with the output not updating on updated `from_date_str` widget
self.contributions_perc_info_rx = pn.rx(contributions_perc_info)(
timeline_df=self.data_store.timeline_df_rx,
from_date_str=self.param.from_date_str.rx(),
author_id=self.author_id,
show_descr=self.show_descr,
)

def __panel__(self) -> pn.viewable.Viewable:
return self.contributions_perc_info_rx.rx.value
30 changes: 25 additions & 5 deletions src/diffinsights_web/views/plots/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@
import hvplot.pandas # noqa

from diffinsights_web.datastore.timeline import \
get_date_range, get_value_range, filter_df_by_from_date, authors_info_df
get_date_range, get_value_range, filter_df_by_from_date, authors_info_df, author_timeline_df_freq
from diffinsights_web.utils.notifications import warning_notification
from diffinsights_web.views import TimelineView


class SpecialColumn(Enum):
class SpecialColumnEnum(Enum):
LINE_TYPES_PERC = "KIND [%]"
NO_PLOT = "<NO PLOT>"


def line_type_sorting_key(column_name: str) -> int:
Expand All @@ -39,6 +40,10 @@ def plot_commits(resampled_df: pd.DataFrame,
xlim: Optional[tuple] = None,
ylim: Optional[tuple] = None,
kind: str = 'step'):
# super special case
if column == SpecialColumnEnum.NO_PLOT.value:
return

filtered_df = filter_df_by_from_date(resampled_df, from_date_str)

hvplot_kwargs = {}
Expand All @@ -65,7 +70,7 @@ def plot_commits(resampled_df: pd.DataFrame,
ylim = (-1, ylim[1])

# special cases: y range limits
if column == SpecialColumn.LINE_TYPES_PERC.value:
if column == SpecialColumnEnum.LINE_TYPES_PERC.value:
ylim = (0.0, 1.05)

# via https://oklch-palette.vercel.app/ and https://htmlcolorcodes.com/rgb-to-hex/
Expand All @@ -82,7 +87,7 @@ def plot_commits(resampled_df: pd.DataFrame,
color = color_map.get(column, '#006dd8')

# special cases: the plot itself
if column == SpecialColumn.LINE_TYPES_PERC.value:
if column == SpecialColumnEnum.LINE_TYPES_PERC.value:
kind_perc_columns = [
col for col in resampled_df.columns
if col.startswith('type.') and col.endswith(' [%]')
Expand Down Expand Up @@ -238,9 +243,15 @@ def __init__(self, **params):
)

def __panel__(self) -> pn.viewable.Viewable:
if self.column_name == SpecialColumnEnum.NO_PLOT.value:
return pn.Spacer(height=0)

return pn.pane.HoloViews(
self.plot_commits_rx,
theme=self.select_plot_theme_widget,
# sizing configuration
height=350, # TODO: find a better way than fixed height
sizing_mode='stretch_width',
)


Expand All @@ -252,8 +263,14 @@ def __init__(self, **params):
#print("TimeseriesPlotForAuthor.__init__()")
super().__init__(**params)

self.resampled_df_rx = pn.rx(author_timeline_df_freq)(
resample_by_author_df=self.main_plot.data_store.resampled_timeline_by_author_rx,
author_id=self.author_email,
resample_rate=self.data_store.resample_frequency_widget,
)

self.plot_commits_rx = pn.rx(plot_commits)(
resampled_df=self.main_plot.data_store.resampled_timeline_by_author_rx.loc[self.author_email],
resampled_df=self.resampled_df_rx,
column=self.main_plot.param.column_name.rx(),
from_date_str=self.main_plot.param.from_date_str.rx(),
xlim=self.main_plot.date_range_rx,
Expand All @@ -262,6 +279,9 @@ def __init__(self, **params):

def __panel__(self) -> pn.viewable.Viewable:
#print("TimeseriesPlotForAuthor.__panel__()")
if self.main_plot.column_name == SpecialColumnEnum.NO_PLOT.value:
return pn.Spacer(height=0)

return pn.pane.HoloViews(
self.plot_commits_rx,
theme=self.main_plot.select_plot_theme_widget,
Expand Down
Loading