-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathopenqa-powermanagement.py
executable file
·172 lines (151 loc) · 6.04 KB
/
openqa-powermanagement.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#!/usr/bin/python3
# TODO:
# * Add return values checks to avoid crashes/traces (no connection to openQA server, etc.)
# * The host name of machines may not be unique
import configparser
import argparse
import json
import os
import requests
import subprocess
machine_list_idle = []
machine_list_offline = []
machine_list_broken = []
machine_list_busy = []
machines_to_power_on = []
jobs_worker_classes = []
config_file = os.path.join(os.environ.get("OPENQA_CONFIG", "/etc/openqa"), "openqa.ini")
config = configparser.ConfigParser()
config.read(config_file)
openqa_server = "http://localhost"
# Manage cmdline options
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--config")
parser.add_argument("--dry-run", action="store_true")
parser.add_argument("--host")
parser.add_argument("--osd", action="store_true")
parser.add_argument("--o3", action="store_true")
args = parser.parse_args()
if args.config is not None and len(args.config):
config_file = args.config
if args.host is not None and len(args.host):
openqa_server = args.host
elif args.osd:
openqa_server = "https://openqa.suse.de"
elif args.o3:
openqa_server = "https://openqa.opensuse.org"
print("Using openQA server: " + openqa_server)
print("Using config file: " + config_file)
if args.dry_run:
print("Dry run mode")
print("")
# Scheduled/blocked jobs
scheduled_list_file = requests.get(openqa_server + "/tests/list_scheduled_ajax").content
scheduled_list_data = json.loads(scheduled_list_file)
print(
"Processing "
+ str(len(scheduled_list_data["data"]))
+ " job(s) in scheduled/blocked state... (will take about "
+ str(int(len(scheduled_list_data["data"]) * 0.2))
+ " seconds)"
)
# Create list of WORKER_CLASS needed
for job in scheduled_list_data["data"]:
response = requests.get(openqa_server + "/api/v1/jobs/" + str(job["id"]))
job_data = json.loads(response.content)
jobs_worker_classes.append(job_data["job"]["settings"]["WORKER_CLASS"])
jobs_worker_classes = sorted(set(jobs_worker_classes))
print(
"Found "
+ str(len(jobs_worker_classes))
+ " different WORKER_CLASS in scheduled jobs: "
+ str(jobs_worker_classes)
)
# Workers
workers_list_file = requests.get(openqa_server + "/api/v1/workers").content
workers_list_data = json.loads(workers_list_file)
# Create list of hosts which may need to powered up/down
for worker in workers_list_data["workers"]:
if worker["status"] in ["idle"]:
machine_list_idle.append(worker["host"])
elif worker["status"] in ["dead"]: # Looks like 'dead' means 'offline'
machine_list_offline.append(worker["host"])
elif worker["status"] in ["running"]: # Looks like 'running' means 'working'
machine_list_busy.append(worker["host"])
elif worker["status"] in ["broken"]:
machine_list_broken.append(worker["host"])
else:
print("Unhandle worker status: " + str(worker["status"]))
# Clean-up the lists
machine_list_idle = sorted(set(machine_list_idle))
machine_list_offline = sorted(set(machine_list_offline))
machine_list_broken = sorted(set(machine_list_broken))
machine_list_busy = sorted(set(machine_list_busy))
# Remove the machine from idle/offline lists if at least 1 worker is busy
for machine in machine_list_busy:
if machine in machine_list_idle:
machine_list_idle.remove(machine)
if machine in machine_list_offline:
machine_list_offline.remove(machine)
# Remove the machine from offline list if at least 1 worker is idle
for machine in machine_list_idle:
if machine in machine_list_offline:
machine_list_offline.remove(machine)
# Print an overview
print(
str(len(machine_list_idle))
+ " workers listed fully idle: "
+ str(machine_list_idle)
)
print(
str(len(machine_list_offline))
+ " workers listed offline/dead: "
+ str(machine_list_offline)
)
print(
str(len(machine_list_broken))
+ " workers listed broken: "
+ str(machine_list_broken)
)
print(str(len(machine_list_busy)) + " workers listed busy: " + str(machine_list_busy))
# Get WORKER_CLASS for each workers of each machines (idle and offline) and compare to WORKER_CLASS required by scheduled/blocked jobs
for worker in workers_list_data["workers"]:
if worker["host"] in machine_list_offline:
for classes in jobs_worker_classes:
if set(classes.split(",")).issubset(
worker["properties"]["WORKER_CLASS"].split(",")
):
machines_to_power_on.append(worker["host"])
if worker["host"] in machine_list_idle:
if worker["properties"]["WORKER_CLASS"] in jobs_worker_classes:
# Warning: scheduled (blocked?) job could be run on idle machine!
print("Warning: scheduled (blocked?) job could be run on idle machine!")
# Power on machines which can run scheduled jobs
for machine in sorted(set(machines_to_power_on)):
if machine in machine_list_broken:
print(
"Removing '"
+ machine
+ "' from the list to power ON since some workers are broken there"
)
elif args.dry_run:
print("Would power ON '" + machine + "' - Dry run mode")
elif "power_management" in config and config["power_management"].get(
machine + "_POWER_ON"
):
print("Powering ON: " + machine)
subprocess.call(config["power_management"][machine + "_POWER_ON"])
else:
print("Unable to power ON '" + machine + "' - No command for that")
# Power off machines which are idle or broken (TODO: add a threshold, e.g. idle since more than 15 minutes. Does API provide this information?)
for machine in machine_list_idle + machine_list_broken:
if args.dry_run:
print("Would power OFF '" + machine + "' - Dry run mode")
elif "power_management" in config and config["power_management"].get(
machine + "_POWER_OFF"
):
print("Powering OFF: " + machine)
subprocess.call(config["power_management"][machine + "_POWER_OFF"])
else:
print("Unable to power OFF '" + machine + "' - No command for that")