-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconvert_database.py
76 lines (64 loc) · 2.46 KB
/
convert_database.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import pathlib
import traceback
from typing import Any
import numpy as np
import pandas as pd
import wfdb
from tqdm import tqdm
import ecg_segment
def main():
# Read SNOMED-CT codes
snomed = pd.read_csv("ConditionNames_SNOMED-CT.csv")["Snomed_CT"]
# Create empty dataframe to store header info
diagnoses = snomed.unique().tolist()
header_info = pd.DataFrame(columns=["Age", "Sex"] + diagnoses)
# Glob all .hea files in .
files = list(pathlib.Path(".").glob("**/*.hea"))
npy_dir = pathlib.Path("./npy")
npy_dir.mkdir(exist_ok=True)
for file in tqdm(files):
try:
if "ipy" in file.absolute().as_posix():
continue
# Read header
record_name = file.stem
# print("processing", file)
file_without_ext = file.with_suffix("")
# Read record
signal = wfdb.rdrecord(file_without_ext)
# Get ECG signal
ecg = signal.p_signal.T
# Save ecg to npy
np.save(pathlib.PurePath.joinpath(npy_dir, record_name), ecg)
# print("saved npy to", pathlib.PurePath.joinpath(npy_dir, record_name))
header = wfdb.rdheader(file_without_ext)
row: dict[Any, Any] = {d: 0 for d in diagnoses}
row["Age"] = 0
row["Sex"] = 0
# Check if the record has a SNOMED-CT code
for comment in header.comments:
if comment.startswith("Age"):
try:
row["Age"] = int(comment.split(":")[1].strip())
except ValueError:
row["Age"] = -1
elif comment.startswith("Sex"):
row["Sex"] = int(comment.split(":")[1].strip() == "Female")
elif comment.startswith("Dx"):
# Get SNOMED-CT code
snomed_codes = comment.split(":")[1].strip().split(",")
# Get SNOMED-CT description
for code in snomed_codes:
try:
row[int(code.strip())] = 1
except KeyError:
continue
header_info.loc[record_name] = row
except Exception as e:
print(e)
print("error processing", file)
traceback.print_exc()
return
header_info.to_csv("header_info.csv", index_label="Filename")
if __name__ == "__main__":
main()