forked from sfc-gh-nkolur/streamlit-usage-app
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
405 lines (300 loc) · 17.8 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
# Import python packages
import streamlit as st
import pandas as pd
from snowflake.snowpark.context import get_active_session
from snowflake.snowpark.functions import col
from snowflake.snowpark.window import Window
import plotly.express as px
import datetime
#############################################
# FORMATTING
#############################################
#set to wide format
st.set_page_config(layout="wide")
# Write directly to the app
st.title("Snowflake Account Usage App :snowflake:")
st.divider()
#Sub heading info
st.markdown("This app is developed to go off of the account usage schema in your Snowflake account. For detailed information please see the documentation page below.")
st.markdown("https://docs.snowflake.com/en/sql-reference/account-usage#account-usage-views")
#Info bar
st.info('Developed by Nikhil Kolur & Ashish Patel on the Snowflake Sales Engineering Team. This is not a product of Snowflake please use at your own risk.', icon="ℹ️")
st.divider()
#############################################
# DATE FILTER
#############################################
max_date = datetime.datetime.now()
min_date = datetime.datetime.now() - datetime.timedelta(days=365)
this_year = max_date.year
jan_1 = datetime.date(this_year, 1, 1)
dec_31 = datetime.date(this_year, 12, 31)
if 'starting' not in st.session_state:
st.session_state.starting = datetime.datetime.now() - datetime.timedelta(days=30)
if 'ending' not in st.session_state:
st.session_state.ending = max_date
st.markdown("Enter your desired date range (30 days on initial load):")
#Column for Date Picker Buttons
col1, col2, col3, col4, col5 = st.columns([1,1,1,1,1])
with col1:
if st.button('30 Days'):
st.session_state.starting = datetime.datetime.now() - datetime.timedelta(days=30)
st.session_state.ending = datetime.datetime.now()
with col2:
if st.button('60 Days'):
st.session_state.starting = datetime.datetime.now() - datetime.timedelta(days=60)
st.session_state.ending = datetime.datetime.now()
with col3:
if st.button('90 Days'):
st.session_state.starting = datetime.datetime.now() - datetime.timedelta(days=90)
st.session_state.ending = datetime.datetime.now()
with col4:
if st.button('180 Days'):
st.session_state.starting = datetime.datetime.now() - datetime.timedelta(days=180)
st.session_state.ending = datetime.datetime.now()
with col5:
if st.button('365 Days'):
st.session_state.starting = datetime.datetime.now() - datetime.timedelta(days=365)
st.session_state.ending = datetime.datetime.now()
#Date Input
date_input_filter = st.date_input(
"",
(st.session_state.starting,st.session_state.ending),
min_date,
max_date,
)
#Start and End Date (s = start, e = end)
s,e = date_input_filter
st.divider()
# Get the current credentials
session = get_active_session()
credits_used_df = session.sql
#############################################
# Cards at Top
#############################################
#Credits Used Tile
credits_used_sql = f"select round(sum(credits_used),0) as total_credits from snowflake.account_usage.metering_history where start_time between '{s}' and '{e}'"
credits_used_df = session.sql(credits_used_sql)
pandas_credits_used_df = credits_used_df.to_pandas()
#Final Value
credits_used_tile = pandas_credits_used_df.iloc[0].values
# Total # of Jobs Executed
num_jobs_sql = f"select count(*) as number_of_jobs from snowflake.account_usage.query_history where start_time between '{s}' and '{e}'"
num_jobs_df = session.sql(num_jobs_sql)
pandas_num_jobs_df = num_jobs_df.to_pandas()
#Final Value
num_jobs_tile = pandas_num_jobs_df.iloc[0].values
# Current Storage
current_storage_sql = f"select round(avg(storage_bytes + stage_bytes + failsafe_bytes) / power(1024, 4),2) as billable_tb from snowflake.account_usage.storage_usage where USAGE_DATE = current_date() -1;"
current_storage_df = session.sql(current_storage_sql)
pandas_current_storage_df = current_storage_df.to_pandas()
#Final Value
current_storage_tile = pandas_current_storage_df.iloc[0].values
#Column formatting and metrics of header 3 metrics
col1, col2, col3 = st.columns(3)
col1.metric("Credits Used","{:,}".format(int(credits_used_tile)))
col2.metric("Total # of Jobs Executed","{:,}".format(int(num_jobs_tile)))
col3.metric("Current Storage (TB)",current_storage_tile)
#############################################
# Credit Usage Total (Bar Chart)
#############################################
#Credits Usage (Total)
total_credits_used_sql = f"select warehouse_name,sum(credits_used) as total_credits_used from snowflake.account_usage.warehouse_metering_history where start_time between '{s}' and '{e}' group by 1 order by 2 desc limit 10 "
total_credits_used_df = session.sql(total_credits_used_sql)
pandas_credits_used_df = total_credits_used_df.to_pandas()
#Convert to a pandas df
pandas_total_credits_used_df = total_credits_used_df.to_pandas()
#Chart
fig_credits_used=px.bar(pandas_credits_used_df,x='TOTAL_CREDITS_USED',y='WAREHOUSE_NAME',orientation='h',title="Credits Used by Warehouse")
fig_credits_used.update_traces(marker_color='green')
#############################################
# Jobs by Warehouse
#############################################
#Jobs by Warehouse Data Setup
jobs_by_warehouse_sql = f"select warehouse_name,count(*) as number_of_jobs from snowflake.account_usage.query_history where start_time between '{s}' and '{e}' group by 1 order by 2 desc limit 10"
jobs_by_warehouse_df = session.sql(jobs_by_warehouse_sql)
#convert to pandas df
pandas_jobs_by_warehouse_df = jobs_by_warehouse_df.to_pandas()
#chart
fig_jobs_by_warehouse=px.bar(pandas_jobs_by_warehouse_df,x='NUMBER_OF_JOBS',y='WAREHOUSE_NAME',orientation='h',title="# of Jobs by Warehouse")
fig_jobs_by_warehouse.update_traces(marker_color='purple')
#############################################
# Execution by Query Type
#############################################
#Average Execution by Query Type
execution_by_qtype = f"select query_type, warehouse_size, avg(execution_time) / 1000 as average_execution_time from snowflake.account_usage.query_history where start_time between '{s}' and '{e}' group by 1, 2 order by 3 desc;"
execution_by_qtype_df = session.sql(execution_by_qtype)
#convert to pandas df
pandas_execution_by_qtype_df = execution_by_qtype_df.to_pandas()
#chart
fig_execution_by_qtype=px.bar(pandas_execution_by_qtype_df,x='AVERAGE_EXECUTION_TIME',y='QUERY_TYPE',orientation='h',title="Average Execution by Query Type")
#st.write(fig_execution_by_qtype)
#############################################
# Container 1: Credits & Jobs
#############################################
container1 = st.container()
with container1:
plot1, plot2,plot3 = st.columns(3)
with plot1:
st.plotly_chart(fig_credits_used, use_container_width=True)
with plot2:
st.plotly_chart(fig_jobs_by_warehouse, use_container_width=True)
with plot3:
st.plotly_chart(fig_execution_by_qtype, use_container_width=True)
#############################################
# Credits Used Overtime
#############################################
#Credits Used Overtime
credits_used_overtime_sql = f"select start_time::date as usage_date, warehouse_name, sum(credits_used) as total_credits_used from snowflake.account_usage.warehouse_metering_history where start_time between '{s}' and '{e}' group by 1,2 order by 2,1"
credits_used_overtime_df = session.sql(credits_used_overtime_sql)
#convert to pandas df
pandas_credits_used_overtime_df = credits_used_overtime_df.to_pandas()
#chart
fig_credits_used_overtime_df=px.bar(pandas_credits_used_overtime_df,x='USAGE_DATE',y='TOTAL_CREDITS_USED',color='WAREHOUSE_NAME',orientation='v',title="Credits Used Overtime")
st.plotly_chart(fig_credits_used_overtime_df, use_container_width=True)
#############################################
# Top 25 Longest Queries (Success)
#############################################
#Top 25 Longest Queries (Success)
longest_queries_sql = f"select query_id,query_text,(execution_time / 60000) as exec_time from snowflake.account_usage.query_history where execution_status = 'SUCCESS' and start_time between '{s}' and '{e}' order by execution_time desc limit 25"
longest_queries_df = session.sql(longest_queries_sql)
#convert to pandas df
pandas_longest_queries_df = longest_queries_df.to_pandas()
#chart
fig_longest_queries=px.bar(pandas_longest_queries_df,x='EXEC_TIME',y='QUERY_TEXT',orientation='h',title="Longest Successful Queries (Top 25) ")
#st.write(fig_longest_queries)
#############################################
# Top 25 Longest Queries (Failed)
#############################################
#Top 25 Longest Queries (Failed)
f_longest_queries_sql = f"select query_id,query_text,(execution_time / 60000) as exec_time from snowflake.account_usage.query_history where execution_status = 'FAIL' and start_time between '{s}' and '{e}' order by execution_time desc limit 25"
f_longest_queries_df = session.sql(longest_queries_sql)
#convert to pandas df
f_pandas_longest_queries_df = longest_queries_df.to_pandas()
#chart
fig_f_longest_queries=px.bar(f_pandas_longest_queries_df,x='EXEC_TIME',y='QUERY_TEXT',orientation='h',title="Longest Failed Queries (Top 25)")
fig_f_longest_queries.update_traces(marker_color='red')
#st.write(fig_f_longest_queries)
#############################################
# Container 2: Query Success/Failure
#############################################
container2 = st.container()
with container2:
plot1, plot2 = st.columns(2)
with plot1:
st.plotly_chart(fig_longest_queries, use_container_width=True)
with plot2:
st.plotly_chart(fig_f_longest_queries, use_container_width=True)
#############################################
# Warehouse Variance overtime
#############################################
warehouse_variance_sql=f"SELECT WAREHOUSE_NAME, DATE(START_TIME) AS DATE, SUM(CREDITS_USED) AS CREDITS_USED, AVG(SUM(CREDITS_USED)) OVER (PARTITION BY WAREHOUSE_NAME ORDER BY DATE ROWS 7 PRECEDING) AS CREDITS_USED_7_DAY_AVG, (TO_NUMERIC(SUM(CREDITS_USED)/CREDITS_USED_7_DAY_AVG*100,10,2)-100)::STRING || '%' AS VARIANCE_TO_7_DAY_AVERAGE FROM SNOWFLAKE.ACCOUNT_USAGE.WAREHOUSE_METERING_HISTORY where start_time between '{s}' and '{e}' GROUP BY DATE, WAREHOUSE_NAME ORDER BY DATE DESC"
warehouse_variance_df = session.sql(warehouse_variance_sql)
#convert to pandas
pandas_warehouse_variance_df = warehouse_variance_df.to_pandas()
#chart
fig_warehouse_variance_df=px.bar(pandas_warehouse_variance_df,x="DATE",y="VARIANCE_TO_7_DAY_AVERAGE",color ='WAREHOUSE_NAME',orientation='v',title="Warehouse Variance Greater than 7 day Average")
st.plotly_chart(fig_warehouse_variance_df, use_container_width=True)
#############################################
# Total Execution Time by Repeated Queries
#############################################
total_execution_time_sql = f"select query_text, (sum(execution_time) / 60000) as exec_time from snowflake.account_usage.query_history where execution_status = 'SUCCESS' and start_time between '{s}' and '{e}' group by query_text order by exec_time desc limit 10"
total_execution_time_df = session.sql(total_execution_time_sql).to_pandas()
#st.write(total_execution_time_df)
fig_execution_time=px.bar(total_execution_time_df,x='EXEC_TIME',y='QUERY_TEXT', orientation='h',title="Total Execution Time by Repeated Queries")
fig_execution_time.update_traces(marker_color='LightSkyBlue')
st.plotly_chart(fig_execution_time, use_container_width=True)
#############################################
# Credits Billed by Month
#############################################
credits_billed = f"select date_trunc('MONTH', usage_date) as Usage_Month, sum(CREDITS_BILLED) from snowflake.account_usage.metering_daily_history group by Usage_Month"
credits_billed_df = session.sql(credits_billed).to_pandas()
#st.write(credits_billed_df)
fig_credits_billed=px.bar(credits_billed_df,x='USAGE_MONTH',y='SUM(CREDITS_BILLED)', orientation='v',title="Credits Billed by Month")
st.plotly_chart(fig_credits_billed, use_container_width=True)
st.info('The above chart is static and not modified by the date range filter', icon="ℹ️")
#############################################
# Top 10 Average Query Execution Time (By User)
#############################################
query_execution = "select user_name, (avg(execution_time)) / 1000 as average_execution_time from snowflake.account_usage.query_history group by 1 order by 2 desc limit 10"
query_execution_df = session.sql(query_execution).to_pandas()
#st.write(query_execution_df)
fig_cquery_execution=px.bar(query_execution_df,x='USER_NAME',y='AVERAGE_EXECUTION_TIME', orientation='v',title="Average Execution Time per User")
fig_cquery_execution.update_traces(marker_color='MediumPurple')
st.plotly_chart(fig_cquery_execution,use_container_width=True)
#############################################
# GS Utilization by Query Type (Top 10)
#############################################
gs_utilization = "select query_type, sum(credits_used_cloud_services) cs_credits, count(1) num_queries from snowflake.account_usage.query_history where true group by 1 order by 2 desc limit 10"
gs_utilization_df = session.sql(gs_utilization).to_pandas()
#st.write(gs_utilization_df)
fig_gs_utilization=px.bar(gs_utilization_df,x='QUERY_TYPE',y='CS_CREDITS', orientation='v',title="GS Utilization by Query Type (Top 10)")
fig_gs_utilization.update_traces(marker_color='green')
#############################################
# Top 10 Cloud Services by Warehouse
#############################################
compute_gs_by_warehouse = "select warehouse_name, sum(credits_used_cloud_services) CREDITS_USED_CLOUD_SERVICES from snowflake.account_usage.warehouse_metering_history where true group by 1 order by 2 desc limit 10"
compute_gs_by_warehouse_df = session.sql(compute_gs_by_warehouse).to_pandas()
#st.write(compute_gs_by_warehouse_df)
fig_compute_gs_by_warehouse=px.bar(compute_gs_by_warehouse_df,x='WAREHOUSE_NAME',y='CREDITS_USED_CLOUD_SERVICES', orientation='v',title="Compute and Cloud Services by Warehouse", barmode="group")
fig_compute_gs_by_warehouse.update_traces(marker_color='purple')
#############################################
# Container 3: Cloud services
#############################################
container2 = st.container()
with container2:
plot1, plot2 = st.columns(2)
with plot1:
st.plotly_chart(fig_gs_utilization, use_container_width=True)
with plot2:
st.plotly_chart(fig_compute_gs_by_warehouse, use_container_width=True)
#############################################
# Data Storage used Overtime
#############################################
storage_overtime = "select date_trunc(month, usage_date) as usage_month, avg(storage_bytes + stage_bytes + failsafe_bytes) / power(1024, 4) as billable_tb, avg(storage_bytes) / power(1024, 4) as Storage_TB, avg(stage_bytes) / power(1024, 4) as Stage_TB, avg(failsafe_bytes) / power(1024, 4) as Failsafe_TB from snowflake.account_usage.storage_usage group by 1 order by 1"
storage_overtime_df = session.sql(storage_overtime).to_pandas()
fig_storage_overtime=px.bar(storage_overtime_df,x='USAGE_MONTH',y='BILLABLE_TB', orientation='v',title="Data Storage used Overtime", barmode="group")
st.plotly_chart(fig_storage_overtime, use_container_width=True)
st.info('The above chart is static and non modified by the date range filter', icon="ℹ️")
#############################################
# Rows Loaded Overtime (COPY INTO)
#############################################
rows_loaded = f"select to_timestamp(date_trunc(day,last_load_time)) as usage_date, sum(row_count) as total_rows from snowflake.account_usage.load_history where usage_date between '{s}' and '{e}' group by 1 order by usage_date desc"
rows_loaded_df = session.sql(rows_loaded).to_pandas()
fig_rows_loaded=px.line(rows_loaded_df,x='USAGE_DATE',y='TOTAL_ROWS', orientation='v',title="Rows Loaded Overtime (Copy Into)")
st.plotly_chart(fig_rows_loaded, use_container_width=True)
#############################################
# Logins by User
#############################################
logins = "select user_name, sum(iff(is_success = 'NO', 1, 0)) as Failed, count(*) as Success, sum(iff(is_success = 'NO', 1, 0)) / nullif(count(*), 0) as login_failure_rate from snowflake.account_usage.login_history group by 1 order by 4 desc"
logins_df = session.sql(logins).to_pandas()
fig_logins=px.bar(logins_df,x='USER_NAME',y='SUCCESS', orientation='v',title="Logins by User", barmode="group")
fig_logins.update_traces(marker_color='green')
#############################################
# Logins by Client
#############################################
logins_client = "select reported_client_type as Client, user_name, sum(iff(is_success = 'NO', 1, 0)) as Failed, count(*) as Success, sum(iff(is_success = 'NO', 1, 0)) / nullif(count(*), 0) as login_failure_rate from snowflake.account_usage.login_history group by 1, 2 order by 5 desc"
logins_client_df = session.sql(logins_client).to_pandas()
fig_logins_client=px.bar(logins_client_df,x='CLIENT',y='SUCCESS', orientation='v',title="Logins by Client")
fig_logins_client.update_traces(marker_color='purple')
#############################################
# Container Users
#############################################
container_users = st.container()
with container_users:
plot1, plot2 = st.columns(2)
with plot1:
st.plotly_chart(fig_logins, use_container_width=True)
with plot2:
st.plotly_chart(fig_compute_gs_by_warehouse, use_container_width=True)
#############################################
# FOOTER
#############################################
st.divider()
foot1, foot2, foot3 = st.columns([1,1,1])
git_link = ""
with foot1:
st.markdown("Version 1.0")
with foot2:
st.markdown("Github Link")
with foot3:
st.markdown("October 2023")