-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_handling.py
92 lines (83 loc) · 4.32 KB
/
data_handling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/env python
""" data_handling.py: Provides resolution table from VMware KBs as machine-readable json files.
VMware KBs provide release information only as a human-readable HTML table.
However, for automation it would be nice to have it in a machine-readable format.
This script takes the tables from a VMware KB page and provides a json-file as an output.
This program is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, either version 3 of the License, or (at your option) any later
version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see <http://www.gnu.org/licenses/>.
"""
__author__ = "Dominik Zorgnotti"
__contact__ = "[email protected]"
__created__ = "2020-02-26"
__deprecated__ = False
__contact__ = "[email protected]"
__license__ = "GPLv3"
__status__ = "beta"
__version__ = "0.3.0"
import os
def create_json_output(kb_dataobject, output_base_dir: str, record_type: str):
"""Takes a list of dataframes from a KB object, an relative output directory and a JSON data"""
outputdir = os.path.join(output_base_dir, record_type)
if not os.path.exists(outputdir):
os.makedirs(outputdir)
# TODO Code repeat make this DRY anytime soon
table_id = 0
for dataframe in kb_dataobject.list_of_dframes:
filename = f"kb{kb_dataobject.id}_{kb_dataobject.fmt_product}_table{table_id}_release_as-{record_type}.json"
if "Build Number" in dataframe.columns and record_type == "index":
dataframe = transform_index(dataframe)
# Adding vxrail handling
elif kb_dataobject.id == 52075 and record_type == "index":
dataframe = transform_kb52075_index(dataframe)
try:
dataframe.to_json(
f"{outputdir}{os.sep}{filename}",
indent=4, orient=record_type, date_format="iso"
)
except ValueError as err:
print(f"{kb_dataobject.id}: Error for json {record_type} in table {table_id}: {err}")
finally:
table_id += 1
if kb_dataobject.list_of_merged_frames:
table_id = 0
for table_name, dataframe in kb_dataobject.list_of_merged_frames.items():
filename = f"kb{kb_dataobject.id}_{kb_dataobject.fmt_product}_{table_name}_as-{record_type}.json"
if "Build Number" in dataframe.columns and record_type == "index":
dataframe = transform_index(dataframe)
try:
dataframe.to_json(
f"{outputdir}{os.sep}{filename}",
indent=4, orient=record_type, date_format="iso"
)
except ValueError as err:
print(f"{kb_dataobject.id}: Error for json {record_type} in merged table {table_id}: {err}")
def transform_index(dataframe):
"""Takes a dataframe as an input and re-creates the index based on the vxrail release. Destructive to the dataframe
as duplicates are erased"""
dataframe.drop_duplicates(subset="Build Number", keep=False, inplace=True)
dataframe.reset_index(drop=True)
dataframe.set_index("Build Number", inplace=True)
return dataframe
def transform_kb52075_index(dataframe):
"""Takes the vxrail dataframe as an input and re-creates the index based on the build number. Destructive to the dataframe
as duplicates are erased"""
dataframe.drop_duplicates(subset="VxRail Release", keep=False, inplace=True)
dataframe.reset_index(drop=True)
dataframe.set_index("VxRail Release", inplace=True)
return dataframe
def standardize_columns(dataframe):
"""Takes a dataframe as an input and renames the columns to a common standard"""
if ("BuildNumber" in dataframe.columns):
dataframe.rename(columns={"BuildNumber": "Build Number"}, inplace=True)
if ("Build number" in dataframe.columns):
dataframe.rename(columns={"Build number": "Build Number"}, inplace=True)
if "ReleaseDate" in dataframe.columns:
dataframe.rename(columns={"ReleaseDate": "Release Date"}, inplace=True)
return dataframe