Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for system metrics #33

Merged
merged 1 commit into from
Jan 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 56 additions & 8 deletions src/src/config/systemMetrics/systemMetrics.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,60 @@
import { systemMetricsDictType } from 'types/utils/formatSystemMetricName';

export const systemMetricsDict: systemMetricsDictType = {
__system__cpu: 'CPU (%)',
__system__p_memory_percent: 'Process Memory (%)',
__system__memory_percent: 'Memory (%)',
__system__disk_percent: 'Disk (%)',
__system__gpu: 'GPU (%)',
__system__gpu_memory_percent: 'GPU Memory (%)',
__system__gpu_power_watts: 'GPU Power (W)',
__system__gpu_temp: 'GPU Temperature (°C)',
'system/cpu_utilization_percentage': 'CPU (%)',
'system/disk_available_megabytes': 'Disk Available (MB)',
'system/disk_usage_megabytes': 'Disk (MB)',
'system/disk_usage_percentage': 'Disk (%)',
'system/gpu_0_memory_usage_megabytes': 'GPU 0 Memory (MB)',

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Couldn't this be built more cleanly with an iterator?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not an expert in typescript, so I don't know 😞
I wanted to make sure we can ship this!

'system/gpu_0_memory_usage_percentage': 'GPU 0 Memory (%)',
'system/gpu_0_utilization_percentage': 'GPU 0 (%)',
'system/gpu_1_memory_usage_megabytes': 'GPU 1 Memory (MB)',
'system/gpu_1_memory_usage_percentage': 'GPU 1 Memory (%)',
'system/gpu_1_utilization_percentage': 'GPU 1 (%)',
'system/gpu_2_memory_usage_megabytes': 'GPU 2 Memory (MB)',
'system/gpu_2_memory_usage_percentage': 'GPU 2 Memory (%)',
'system/gpu_2_utilization_percentage': 'GPU 2 (%)',
'system/gpu_3_memory_usage_megabytes': 'GPU 3 Memory (MB)',
'system/gpu_3_memory_usage_percentage': 'GPU 3 Memory (%)',
'system/gpu_3_utilization_percentage': 'GPU 3 (%)',
'system/gpu_4_memory_usage_megabytes': 'GPU 4 Memory (MB)',
'system/gpu_4_memory_usage_percentage': 'GPU 4 Memory (%)',
'system/gpu_4_utilization_percentage': 'GPU 4 (%)',
'system/gpu_5_memory_usage_megabytes': 'GPU 5 Memory (MB)',
'system/gpu_5_memory_usage_percentage': 'GPU 5 Memory (%)',
'system/gpu_5_utilization_percentage': 'GPU 5 (%)',
'system/gpu_6_memory_usage_megabytes': 'GPU 6 Memory (MB)',
'system/gpu_6_memory_usage_percentage': 'GPU 6 Memory (%)',
'system/gpu_6_utilization_percentage': 'GPU 6 (%)',
'system/gpu_7_memory_usage_megabytes': 'GPU 7 Memory (MB)',
'system/gpu_7_memory_usage_percentage': 'GPU 7 Memory (%)',
'system/gpu_7_utilization_percentage': 'GPU 7 (%)',
'system/gpu_8_memory_usage_megabytes': 'GPU 8 Memory (MB)',
'system/gpu_8_memory_usage_percentage': 'GPU 8 Memory (%)',
'system/gpu_8_utilization_percentage': 'GPU 8 (%)',
'system/gpu_9_memory_usage_megabytes': 'GPU 9 Memory (MB)',
'system/gpu_9_memory_usage_percentage': 'GPU 9 Memory (%)',
'system/gpu_9_utilization_percentage': 'GPU 9 (%)',
'system/gpu_10_memory_usage_megabytes': 'GPU 10 Memory (MB)',
'system/gpu_10_memory_usage_percentage': 'GPU 10 Memory (%)',
'system/gpu_10_utilization_percentage': 'GPU 10 (%)',
'system/gpu_11_memory_usage_megabytes': 'GPU 11 Memory (MB)',
'system/gpu_11_memory_usage_percentage': 'GPU 11 Memory (%)',
'system/gpu_11_utilization_percentage': 'GPU 11 (%)',
'system/gpu_12_memory_usage_megabytes': 'GPU 12 Memory (MB)',
'system/gpu_12_memory_usage_percentage': 'GPU 12 Memory (%)',
'system/gpu_12_utilization_percentage': 'GPU 12 (%)',
'system/gpu_13_memory_usage_megabytes': 'GPU 13 Memory (MB)',
'system/gpu_13_memory_usage_percentage': 'GPU 13 Memory (%)',
'system/gpu_13_utilization_percentage': 'GPU 13 (%)',
'system/gpu_14_memory_usage_megabytes': 'GPU 14 Memory (MB)',
'system/gpu_14_memory_usage_percentage': 'GPU 14 Memory (%)',
'system/gpu_14_utilization_percentage': 'GPU 14 (%)',
'system/gpu_15_memory_usage_megabytes': 'GPU 15 Memory (MB)',
'system/gpu_15_memory_usage_percentage': 'GPU 15 Memory (%)',
'system/gpu_15_utilization_percentage': 'GPU 15 (%)',
'system/network_receive_megabytes': 'Network Rx (MB)',
'system/network_transmit_megabytes': 'Network Tx (MB)',
'system/system_memory_usage_megabytes': 'Memory (MB)',
'system/system_memory_usage_percentage': 'Memory (%)',
};
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,16 @@ const statisticsInitialMap: Record<string, IProjectStatistic> = {
iconBgColor: '#7A4CE0',
navLink: routes.METRICS.path,
},
systemMetrics: {
label: 'Sys. metrics',
count: 0,
icon: 'metrics',
iconBgColor: '#AF4EAB',
navLink: `${routes.METRICS.path}?select=${encode({
advancedQuery: "metric.name.startswith('system/')",
advancedMode: true,
})}`,
},
};

const runsCountingInitialMap: Record<'archived' | 'runs', IProjectStatistic> = {
Expand Down Expand Up @@ -59,7 +69,7 @@ function ProjectStatistics() {
let systemMetricsCount = 0;
let sequenceItemsCount = 0;
for (let [itemKey, itemData] of Object.entries(seqData)) {
if (itemKey.startsWith('__system__')) {
if (itemKey.startsWith('system/')) {
systemMetricsCount += itemData.length;
} else {
sequenceItemsCount += itemData.length;
Expand Down
10 changes: 10 additions & 0 deletions src/src/pages/RunDetail/RunDetail.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ const tabs: Record<string, string> = {
overview: 'Overview',
run_parameters: 'Run Params',
metrics: 'Metrics',
system: 'System',
settings: 'Settings',
};

Expand Down Expand Up @@ -117,6 +118,15 @@ function RunDetail(): React.FunctionComponentElement<React.ReactNode> {
isRunBatchLoading={runData?.isRunBatchLoading}
/>
),
system: (
<RunDetailMetricsAndSystemTab
runHash={runHash}
runTraces={runData?.runTraces}
runBatch={runData?.runSystemBatch}
isSystem
isRunBatchLoading={runData?.isRunBatchLoading}
/>
),
settings: (
<RunDetailSettingsTab
isArchived={runData?.runInfo?.archived}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,18 @@ function RunOverviewSidebar({
const insightsList = React.useMemo(() => {
const path = url.split('/').slice(0, -1).join('/');
const systemMetricsLength: number =
traces.metric.filter((m) => m.name.startsWith('__system__')).length || 0;
traces.metric.filter((m) => m.name.startsWith('system/')).length || 0;
return [
{
name: 'Metrics',
path: `${path}/metrics`,
value: traces?.metric?.length - systemMetricsLength || 0,
},
{
name: 'System',
path: `${path}/system`,
value: systemMetricsLength,
},
];
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [traces]);
Expand Down
27 changes: 18 additions & 9 deletions src/src/pages/Runs/components/RunsTableGrid/RunsTableGrid.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import { ITableColumn } from 'types/pages/metrics/components/TableColumns/TableC
import { ITagInfo } from 'types/pages/tags/Tags';

import { formatSystemMetricName } from 'utils/formatSystemMetricName';
import alphabeticalSortComparator from 'utils/alphabeticalSortComparator';
import { getMetricHash } from 'utils/app/getMetricHash';
import { getMetricLabel } from 'utils/app/getMetricLabel';
import { isSystemMetric } from 'utils/isSystemMetric';
Expand Down Expand Up @@ -149,14 +148,24 @@ function getRunsTableColumns(
};
isSystem ? systemMetricsList.push(column) : metricsList.push(column);
});
acc = [
...acc,
...metricsList.sort(alphabeticalSortComparator({ orderBy: 'key' })),
...systemMetricsList.sort(
alphabeticalSortComparator({ orderBy: 'key' }),
),
];
return acc;
acc = [...acc, ...metricsList, ...systemMetricsList];
return acc.sort((a: ITableColumn, b: ITableColumn) => {
const aIsSystem = isSystemMetric(a['key']);
const bIsSystem = isSystemMetric(b['key']);
if (aIsSystem && !bIsSystem) {
return -1;
} else if (!aIsSystem && bIsSystem) {
return 1;
}
const aLabel = (a['label'] as string).toUpperCase();
const bLabel = (b['label'] as string).toUpperCase();
if (aLabel < bLabel) {
return -1;
} else if (aLabel > bLabel) {
return 1;
}
return 0;
});
}, []),
runColumns.map((param) => ({
key: param,
Expand Down
2 changes: 1 addition & 1 deletion src/src/services/models/runs/runDetailAppModel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ function processRunBatchData(
}),
sortKey: `${run.name}${contextName}`,
};
if (run.name.startsWith('__system__')) {
if (run.name.startsWith('system/')) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the third time I'm seeing this magic string in this PR - could we extract a constant somehow?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is based on the existing UI code from Aim at this stage. Extracting a constant would make total sense, but I wanted to keep this as close as possible to the current upstream logic.

runSystemBatch.push(metric);
} else {
runMetricsBatch.push(metric);
Expand Down
66 changes: 57 additions & 9 deletions src/src/types/utils/formatSystemMetricName.d.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,58 @@
export type systemMetricsDictType = {
__system__cpu: string;
__system__p_memory_percent: string;
__system__memory_percent: string;
__system__disk_percent: string;
__system__gpu: string;
__system__gpu_memory_percent: string;
__system__gpu_power_watts: string;
__system__gpu_temp: string;
};
'system/cpu_utilization_percentage': string;
'system/disk_available_megabytes': string;
'system/disk_usage_megabytes': string;
'system/disk_usage_percentage': string;
'system/gpu_0_memory_usage_megabytes': string;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment re: tidying this w/an iterator...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same reply as above: if there is a way to do this cleanly in typescript, I'll take it!

'system/gpu_0_memory_usage_percentage': string;
'system/gpu_0_utilization_percentage': string;
'system/gpu_1_memory_usage_megabytes': string;
'system/gpu_1_memory_usage_percentage': string;
'system/gpu_1_utilization_percentage': string;
'system/gpu_10_memory_usage_megabytes': string;
'system/gpu_10_memory_usage_percentage': string;
'system/gpu_10_utilization_percentage': string;
'system/gpu_11_memory_usage_megabytes': string;
'system/gpu_11_memory_usage_percentage': string;
'system/gpu_11_utilization_percentage': string;
'system/gpu_12_memory_usage_megabytes': string;
'system/gpu_12_memory_usage_percentage': string;
'system/gpu_12_utilization_percentage': string;
'system/gpu_13_memory_usage_megabytes': string;
'system/gpu_13_memory_usage_percentage': string;
'system/gpu_13_utilization_percentage': string;
'system/gpu_14_memory_usage_megabytes': string;
'system/gpu_14_memory_usage_percentage': string;
'system/gpu_14_utilization_percentage': string;
'system/gpu_15_memory_usage_megabytes': string;
'system/gpu_15_memory_usage_percentage': string;
'system/gpu_15_utilization_percentage': string;
'system/gpu_2_memory_usage_megabytes': string;
'system/gpu_2_memory_usage_percentage': string;
'system/gpu_2_utilization_percentage': string;
'system/gpu_3_memory_usage_megabytes': string;
'system/gpu_3_memory_usage_percentage': string;
'system/gpu_3_utilization_percentage': string;
'system/gpu_4_memory_usage_megabytes': string;
'system/gpu_4_memory_usage_percentage': string;
'system/gpu_4_utilization_percentage': string;
'system/gpu_5_memory_usage_megabytes': string;
'system/gpu_5_memory_usage_percentage': string;
'system/gpu_5_utilization_percentage': string;
'system/gpu_6_memory_usage_megabytes': string;
'system/gpu_6_memory_usage_percentage': string;
'system/gpu_6_utilization_percentage': string;
'system/gpu_7_memory_usage_megabytes': string;
'system/gpu_7_memory_usage_percentage': string;
'system/gpu_7_utilization_percentage': string;
'system/gpu_8_memory_usage_megabytes': string;
'system/gpu_8_memory_usage_percentage': string;
'system/gpu_8_utilization_percentage': string;
'system/gpu_9_memory_usage_megabytes': string;
'system/gpu_9_memory_usage_percentage': string;
'system/gpu_9_utilization_percentage': string;
'system/network_receive_megabytes': string;
'system/network_transmit_megabytes': string;
'system/system_memory_usage_megabytes': string;
'system/system_memory_usage_percentage': string;
};