-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrnn.py
98 lines (80 loc) · 3.57 KB
/
rnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# -*- coding: utf-8 -*-
"""rnn.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1ew-mmvutdj8KoySKwoBxFiXZq7_SH-9V
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.utils import shuffle
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN
from tensorflow.keras.utils import to_c
# Load data
scada_df = pd.read_csv('scada_data.csv')
scada_df['DateTime'] = pd.to_datetime(scada_df['DateTime'])
status_df = pd.read_csv('status_data.csv')
status_df['Time'] = pd.to_datetime(status_df['Time'])
status_df.rename(columns={'Time': 'DateTime'}, inplace=True)
fault_df = pd.read_csv('fault_data.csv')
fault_df['DateTime'] = pd.to_dat
# Merge data
df_combine = scada_df.merge(fault_df, on='Time', how='outer')
df_combine['Fault'] = df_combine['Fault'].fillna('NF')
df_combine = df_combine.drop(columns=['DateTime_x', 'Time', 'Error', 'WEC: ava. windspeed',
'WEC: ava. available P from wind',
'WEC: ava. available P technical reasons',
'WEC: ava. Available P force majeure reasons',
'WEC: max. windspeed', 'WEC: min. windspeed',
'WEC: Operating Hours', 'WEC: Production kWh',
'WEC: Production minutes', 'DateTime_y'])
etime(fault_df['DateTime']) 'WEC: ava. Available P force external reasons',
# Balancing classes
df_nf = df_combine[df_combine.Fault=='NF'].sample(300, random_state=42)
df_f = df_combine[df_combine.Fault!='NF']
df_combine = pd.concat((df_nf, df_f), axis=0).reset_index(drop=True)
# Prepare data for RNN
X = df_combine.drop(columns=['Fault']).values
y = pd.get_dummies(df_combine['Fault']).values # One-hot encode fault categories
# Shuffle data
X, y = shuffle(X, y, random_state=42)
# Train-test split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Normalize inputs
scaler = StandardScaler()
X_train = scaler.fit_transform(x_train)
X_test = scaler.transform(x_test)
# Reshape data for RNN input
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
# Build RNN model
model = Sequential()
model.add(SimpleRNN(64, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(32, activation='relu'))
model.add(Dense(y_train.shape[1], activation='softmax'))
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# Train model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=2)
# Evaluate model
accuracy = model.evaluate(X_test, y_test, verbose=0)[1]
print("Accuracy: {:.2f}%".format(accuracy * 100))
# Predictions on test data
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)
conf_matrix = confusion_matrix(y_true_classes, y_pred_classes)
# Generate confusion matrix
fault_categories = ['AF', 'EF', 'FF', 'GF', 'MF', 'NF']
# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=fault_categories, yticklabels=fault_categories)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()