-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpostgres-window_functions.sql
201 lines (164 loc) · 5.38 KB
/
postgres-window_functions.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
-- window functions & GROUPING SETS
-- https://www.postgresql.org/docs/devel/static/queries-table-expressions.html#QUERIES-GROUPING-SETS
-- https://www.postgresql.org/docs/current/static/tutorial-window.html
-- https://www.postgresql.org/docs/current/static/functions-window.html
CREATE SCHEMA scores;
SET SEARCH_PATH TO 'scores';
create table scores (
id INT,
name VARCHAR(50),
score INT
);
insert into scores (id, name, score) values
(1, 'Sara Alvarez', 62),
(2, 'Laura Ryan', 63),
(3, 'Clarence Mcdonald', 76),
(4, 'Gary Gardner', 71),
(5, 'Aaron Williamson', 61),
(6, 'Roger Martin', 60),
(7, 'William Peterson', 62),
(8, 'Beverly Hamilton', 57),
(9, 'Ashley Watkins', 58),
(10, 'Eugene Gardner', 69);
SELECT * FROM scores;
-- I want to rank students according to their score
-- first without window function
SELECT s1.name,
s1.score as score,
(SELECT count(DISTINCT s2.score) FROM scores s2 WHERE s2.score >= s1.score) AS rank
FROM scores s1
ORDER BY 2, 1
-- with window functions
SELECT s.name, s.score, DENSE_RANK() OVER (ORDER BY s.score DESC)
FROM scores s
ORDER BY 2,1
-- let's try simple RANK
SELECT s.name, s.score, RANK() OVER (ORDER BY s.score DESC)
FROM scores s
ORDER BY 3
CREATE SCHEMA employees;
SET SEARCH_PATH TO 'employees';
CREATE TABLE employees (
id serial PRIMARY KEY,
name varchar(50),
gender char,
salary bigint,
department_id bigint
);
CREATE TABLE departments (
id serial PRIMARY KEY,
name varchar(30)
);
INSERT INTO departments(name) VALUES
('IT'),
('Sales'),
('Marketing');
INSERT INTO employees(name, gender, salary, department_id) values
('Jimmy Gonzalez', 'M', 782,1),
('Peter Anderson', 'M', 750,1),
('Justin Bennett', 'M', 728,1),
('Brandon Little', 'M', 728,1),
('Donna Andrews', 'F', 715,1),
('Jerry Harper', 'M', 612,2),
('Judy Fox', 'F', 707,2),
('Paula Murphy', 'F', 759,2),
('Rachel Murphy', 'F', 719,2),
('Joshua Smith', 'M', 750, 3),
('Nancy Mason', 'F', 783, 3);
-- I want top three salaries for each department
-- first without window functions, omg, that's awful
SELECT d.name as department, e3.name as employee, e3.salary
FROM employees e3
JOIN (
SELECT DISTINCT e.department_id, e.salary
FROM employees e
WHERE (
SELECT COUNT(*)
FROM (SELECT DISTINCT department_id, salary FROM employees) e2
WHERE e2.department_id = e.department_id AND e2.salary > e.salary
) < 3
) tmp ON tmp.department_id = e3.department_id AND tmp.salary = e3.salary
JOIN departments d ON e3.department_id = d.id
ORDER BY 1, 3 DESC, 2 ASC
-- now with window functions
SELECT tmp.name wfdepartment, tmp.empl wfemployee, tmp.salary wfsalary
FROM (
SELECT d.name, e.name empl, e.salary, DENSE_RANK() OVER (PARTITION BY d.id ORDER BY e.salary DESC) as rank
FROM departments d
JOIN employees e ON e.department_id = d.id
) tmp
WHERE rank <= 4
ORDER BY 1,3 DESC, 2 ASC;
CREATE SCHEMA window_functions;
SET SEARCH_PATH TO 'window_functions';
SELECT * FROM generate_series(1,3);
-- just to better understand what order by does
select x, array_agg(x) over ()
from generate_series(1, 3) as t(x);
select x, array_agg(x) over (order by x)
from generate_series(1, 3) as t(x);
-- default frame
select x,
array_agg(x) over (order by x
rows between unbounded preceding
and current row)
from generate_series(1, 3) as t(x);
-- look forward
select x,
array_agg(x) over (rows between current row
and unbounded following)
from generate_series(1, 3) as t(x);
-- whole partition
select x,
array_agg(x) over (rows between unbounded preceding
and unbounded following)
from generate_series(1, 3) as t(x);
-- use case: sum column and current row ratio in a single query
select x,
array_agg(x) over () as frame,
sum(x) over () as sum,
x::float/sum(x) over () as part
from generate_series(1, 3) as t(x);
-- running total
select x,
array_agg(x) over () as frame,
sum(x) over (order by x) as sum,
x::float/sum(x) over () as part
from generate_series(1, 3) as t(x);
DROP TABLE p;
CREATE TABLE p as
select a.date::date as date,
1 + floor(b.x * random()) as x
from generate_series(date 'yesterday', date 'tomorrow', '1 day') as a(date),
generate_series(1, 3) as b(x);
SELECT * FROM p;
SELECT date, x,
count(x) over (partition by date, x),
array_agg(x) over(partition by date, x),
count(x) over (partition by date),
array_agg(x) over(partition by date)
FROM p;
-- dalsie window functions
select x,
row_number() over(),
ntile(4) over w,
lag(x, 2) over w,
lead(x, 1) over w
from generate_series(1, 15, 2) as t(x)
window w as (order by x);
SELECT d.name, e.gender, avg(salary)
FROM employees e
JOIN departments d ON e.department_id = d.id
GROUP BY GROUPING SETS ((d.name), (d.name, e.gender), ());
SELECT d.name, e.gender, avg(salary)
FROM employees e
JOIN departments d ON e.department_id = d.id
GROUP BY ROLLUP (d.name, e.gender);
SELECT d.name, e.gender, avg(salary)
FROM employees e
JOIN departments d ON e.department_id = d.id
GROUP BY GROUPING SETS ((d.name), (e.gender), (d.name, e.gender), ());
SELECT d.name, e.gender, avg(salary)
FROM employees e
JOIN departments d ON e.department_id = d.id
GROUP BY CUBE (d.name, e.gender);