-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathn_cmapss_example_data_loading_and_exploration.py
324 lines (259 loc) · 12.3 KB
/
n_cmapss_example_data_loading_and_exploration.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
# -*- coding: utf-8 -*-
"""N-CMAPSS_Example_data_loading_and_exploration.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1lm1HSNftoCFAtv41NKPdXQtlZM7uSTei
# Dataset Exploration - N-CMAPSS DS02
The new C-MAPSS dataset DS02 from NASA provides degradation trajectories of 9 turbofan engines with unknown and different initial health condition for complete flights and two failure modes (HPT efficiency degradation & HPT efficiency degradation combined with LPT efficiency and capacity degradation). The data were synthetically generated with the Commercial Modular Aero-Propulsion System Simulation (C-MAPSS) dynamical model. The data contains multivariate sensors readings of the complete run-to-failure trajectories. Therefore, the records stop at the cycle/time the engine failed. A total number of 6.5M time stamps are available.
Copyright (c) by Manuel Arias.
"""
# Commented out IPython magic to ensure Python compatibility.
import os
import h5py
import time
import matplotlib
import numpy as np
import pandas as pd
# import seaborn as sns
from pandas import DataFrame
import matplotlib.pyplot as plt
from matplotlib import gridspec
# %matplotlib inline
### Set-up - Define file location
filename = 'N-CMAPSS_DS01-005.h5'
"""#### Read Raw Data"""
# Time tracking, Operation time (min): 0.003
t = time.process_time()
# Load data
with h5py.File(filename, 'r') as hdf:
# Development set
W_dev = np.array(hdf.get('W_dev')) # W
X_s_dev = np.array(hdf.get('X_s_dev')) # X_s
X_v_dev = np.array(hdf.get('X_v_dev')) # X_v
T_dev = np.array(hdf.get('T_dev')) # T
Y_dev = np.array(hdf.get('Y_dev')) # RUL
A_dev = np.array(hdf.get('A_dev')) # Auxiliary
# Test set
W_test = np.array(hdf.get('W_test')) # W
X_s_test = np.array(hdf.get('X_s_test')) # X_s
X_v_test = np.array(hdf.get('X_v_test')) # X_v
T_test = np.array(hdf.get('T_test')) # T
Y_test = np.array(hdf.get('Y_test')) # RUL
A_test = np.array(hdf.get('A_test')) # Auxiliary
# Varnams
W_var = np.array(hdf.get('W_var'))
X_s_var = np.array(hdf.get('X_s_var'))
X_v_var = np.array(hdf.get('X_v_var'))
T_var = np.array(hdf.get('T_var'))
A_var = np.array(hdf.get('A_var'))
# from np.array to list dtype U4/U5
W_var = list(np.array(W_var, dtype='U20'))
X_s_var = list(np.array(X_s_var, dtype='U20'))
X_v_var = list(np.array(X_v_var, dtype='U20'))
T_var = list(np.array(T_var, dtype='U20'))
A_var = list(np.array(A_var, dtype='U20'))
W = np.concatenate((W_dev, W_test), axis=0)
X_s = np.concatenate((X_s_dev, X_s_test), axis=0)
X_v = np.concatenate((X_v_dev, X_v_test), axis=0)
T = np.concatenate((T_dev, T_test), axis=0)
Y = np.concatenate((Y_dev, Y_test), axis=0)
A = np.concatenate((A_dev, A_test), axis=0)
print('')
print("Operation time (min): " , (time.process_time()-t)/60)
print('')
print ("W shape: " + str(W.shape))
print ("X_s shape: " + str(X_s.shape))
print ("X_v shape: " + str(X_v.shape))
print ("T shape: " + str(T.shape))
print ("A shape: " + str(A.shape))
"""## Auxiliary Information ($A$)"""
df_A = DataFrame(data=A, columns=A_var)
df_A.describe()
"""### Units Ids"""
print('Engine units in df: ', np.unique(df_A['unit']))
"""### Flight Classes
The units are divided into three flight classes depending on whether the unit is operating short-length flights (i.e., flight class 1), medium-length flights (i.e., flight class 2), or long-length flights (i.e., flight class 2). A number of real flight conditions are available within each of the flight classes.
| Flight Class | Flight Length [h]
| :-----------: | :-----------:
| 1 | 1 to 3
| 2 | 3 to 5
| 3 | 5 to 7
"""
labelsize = 17
plt.plot(df_A.unit, df_A.Fc, 'o')
plt.tick_params(axis='x', labelsize=labelsize )
plt.tick_params(axis='y', labelsize=labelsize )
plt.xlabel('Unit # [-]', fontsize=labelsize)
plt.ylabel('Flight Class # [-]', fontsize=labelsize )
"""### End Of Failure ($t_{\text{EOF}}$)
The run to failure operation take a different number of cycles for each unit. Below we report the total number of cycles for each unit.
"""
for i in np.unique(df_A['unit']):
print('Unit: ' + str(i) + ' - Number of flight cyles (t_{EOF}): ', len(np.unique(df_A.loc[df_A['unit'] == i, 'cycle'])))
#np.unique(df_A.loc[df_A['unit'] == i, 'cycle'])
"""## Operative Conditions ($w$)
DASHlink- Flight Data For Tail 687.(2012). Retrieved on 2019-01-29 from https://c3.nasa.gov/dashlink/
"""
df_W = DataFrame(data=W, columns=W_var)
df_W['unit'] = df_A['unit'].values
def plot_df_single_color(data, variables, labels, size=12, labelsize=17, name=None):
"""
"""
plt.clf()
input_dim = len(variables)
cols = min(np.floor(input_dim**0.5).astype(int),4)
rows = (np.ceil(input_dim / cols)).astype(int)
gs = gridspec.GridSpec(rows, cols)
fig = plt.figure(figsize=(size,max(size,rows*2)))
for n in range(input_dim):
ax = fig.add_subplot(gs[n])
ax.plot(data[variables[n]], marker='.', markerfacecolor='none', alpha = 0.7)
ax.tick_params(axis='x', labelsize=labelsize)
ax.tick_params(axis='y', labelsize=labelsize)
plt.ylabel(labels[n], fontsize=labelsize)
plt.xlabel('Time [s]', fontsize=labelsize)
plt.tight_layout()
if name is not None:
plt.savefig(name, format='png', dpi=300)
plt.show()
plt.close()
def plot_df_color_per_unit(data, variables, labels, size=7, labelsize=17, option='Time', name=None):
"""
"""
plt.clf()
input_dim = len(variables)
cols = min(np.floor(input_dim**0.5).astype(int),4)
rows = (np.ceil(input_dim / cols)).astype(int)
gs = gridspec.GridSpec(rows, cols)
leg = []
fig = plt.figure(figsize=(size,max(size,rows*2)))
color_dic_unit = {'Unit 1': 'C0', 'Unit 2': 'C1', 'Unit 3': 'C2', 'Unit 4': 'C3', 'Unit 5': 'C4', 'Unit 6': 'C5',
'Unit 7': 'C6', 'Unit 8': 'C7', 'Unit 9': 'C8', 'Unit 10': 'C9', 'Unit 11': 'C10',
'Unit 12': 'C11', 'Unit 13': 'C12', 'Unit 14': 'C13', 'Unit 15': 'C14', 'Unit 16': 'C15',
'Unit 17': 'C16', 'Unit 18': 'C17', 'Unit 19': 'C18', 'Unit 20': 'C19'}
unit_sel = np.unique(data['unit'])
for n in range(input_dim):
ax = fig.add_subplot(gs[n])
for j in unit_sel:
data_unit = data.loc[data['unit'] == j]
if option=='cycle':
time_s = data.loc[data['unit'] == j, 'cycle']
label_x = 'Time [cycle]'
else:
time_s = np.arange(len(data_unit))
label_x = 'Time [s]'
ax.plot(time_s, data_unit[variables[n]], '-o', color=color_dic_unit['Unit ' + str(int(j))],
alpha=0.7, markersize=5)
ax.tick_params(axis='x', labelsize=labelsize)
ax.tick_params(axis='y', labelsize=labelsize)
leg.append('Unit '+str(int(j)))
plt.ylabel(labels[n], fontsize=labelsize)
plt.xlabel(label_x, fontsize=labelsize)
ax.get_xaxis().set_major_formatter(
matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
if n==0:
ax.get_yaxis().set_major_formatter(
matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
plt.legend(leg, loc='best', fontsize=labelsize-2) #lower left
plt.tight_layout()
if name is not None:
plt.savefig(name, format='png', dpi=300)
plt.show()
plt.close()
"""### Flight Traces"""
df_W_u = df_W.loc[(df_A.unit == 5) & (df_A.cycle == 1)]
df_W_u.reset_index(inplace=True, drop=True)
labels = ['Altitude [ft]', 'Mach Number [-]', 'Throttle Resolver Angle [%]', 'Temperature at fan inlet (T2) [°R]']
plot_df_color_per_unit(df_W_u, W_var , labels, size=12, labelsize=19, name='flight_profile_DS02.png')
"""### Flight envelope"""
labelsize = 17
x = np.array([0.0, 0.2, 0.4, 0.6, 0.8])
u = np.array([1.7, 1.7, 4.0, 4.0, 4.0])*10000
l = np.array([0.0, 0.0, 0.0, 0.0, 1.0])*10000
plt.figure(figsize=(7,5))
plt.fill_between(x, l, u, alpha=0.2)
plt.plot(df_W.loc[df_A['Fc'] == 3, 'Mach'], df_W.loc[df_A['Fc'] == 3, 'alt'], '.', alpha=0.9)
plt.plot(df_W.loc[df_A['Fc'] == 2, 'Mach'], df_W.loc[df_A['Fc'] == 2, 'alt'], '.', alpha=0.9)
plt.plot(df_W.loc[df_A['Fc'] == 1, 'Mach'], df_W.loc[df_A['Fc'] == 1, 'alt'], '.', alpha=0.9)
plt.tick_params(axis='x', labelsize=labelsize )
plt.tick_params(axis='y', labelsize=labelsize )
plt.xlim((0.0, 0.8))
plt.ylim((0, 40000))
plt.xlabel('Mach Number - [-]', fontsize=labelsize)
plt.ylabel('Flight Altitude - [ft]', fontsize=labelsize)
"""### Histogram of Flight Conditions"""
def plot_kde(leg, variables, labels, size, units, df_W, df_A, labelsize=17, name=None):
"""
"""
plt.clf()
input_dim = len(variables)
cols = min(np.floor(input_dim**0.5).astype(int),4)
rows = (np.ceil(input_dim / cols)).astype(int)
gs = gridspec.GridSpec(rows, cols)
color_dic_unit = {'Unit 1': 'C0', 'Unit 2': 'C1', 'Unit 3': 'C2', 'Unit 4': 'C3', 'Unit 5': 'C4', 'Unit 6': 'C5',
'Unit 7': 'C6', 'Unit 8': 'C7', 'Unit 9': 'C8', 'Unit 10': 'C9', 'Unit 11': 'C10',
'Unit 12': 'C11', 'Unit 13': 'C12', 'Unit 14': 'C13', 'Unit 15': 'C14', 'Unit 16': 'C15',
'Unit 17': 'C16', 'Unit 18': 'C17', 'Unit 19': 'C18', 'Unit 20': 'C19'}
fig = plt.figure(figsize=(size,max(size,rows*2)))
for n in range(input_dim):
ax = fig.add_subplot(gs[n])
for k, elem in enumerate(units):
sns.kdeplot(df_W.loc[df_A['unit'] == elem, variables[n]],
color=color_dic_unit[leg[k]], shade=True, gridsize=100)
ax.tick_params(axis='x', labelsize=labelsize)
ax.tick_params(axis='y', labelsize=labelsize)
ax.get_xaxis().set_major_formatter(
matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
plt.xlabel(labels[n], fontsize=labelsize)
plt.ylabel('Density [-]', fontsize=labelsize)
if n==0:
plt.legend(leg, fontsize=labelsize-4, loc=0)
else:
plt.legend(leg, fontsize=labelsize-4, loc=2)
plt.tight_layout()
if name is not None:
plt.savefig(name, format='png', dpi=300)
plt.show()
plt.close()
variables = ['alt', 'Mach', 'TRA', 'T2']
labels = ['Altitude [ft]', 'Mach Number [-]', 'Throttle Resolver Angle [%]', 'Temperature at fan inlet [°R]']
size = 10
units = list(np.unique(df_A['unit']))
leg = ['Unit ' + str(int(u)) for u in units]
plot_kde(leg, variables, labels, size, units, df_W, df_A, labelsize=19, name='kde_DS02.png')
"""## Degradation ($\theta$)"""
df_T = DataFrame(data=T, columns=T_var)
df_T['unit'] = df_A['unit'].values
df_T['cycle'] = df_A['cycle'].values
df_Ts = df_T.drop_duplicates()
df_Ts.describe()
import plotly.express as px
varsel = ['unit', 'HPT_eff_mod', 'LPT_eff_mod', 'LPT_flow_mod']
df_Tss = df_Ts.loc[:,varsel]
fig = px.parallel_coordinates(df_Tss, color="unit", labels={"unit": "Units",
"HPT_eff_mod": "HPT_eff_mod", "LPT_eff_mod": "LPT_eff_mod",
"LPT_flow_mod": "LPT_flow_mod", },
color_continuous_scale=px.colors.diverging.Tealrose,
color_continuous_midpoint=2)
fig.show()
labels = T_var
plot_df_single_color(df_T, T_var , labels)
plot_df_color_per_unit(df_Ts, ['HPT_eff_mod'], [r'HPT Eff. - $\theta$ [-]'], size=7, option='cycle')
"""## Sensor readings ($X_s$)"""
df_X_s = DataFrame(data=X_s, columns=X_s_var)
"""### Single Unit"""
df_X_s_u = df_X_s.loc[df_A.unit == 2]
df_X_s_u.reset_index(inplace=True, drop=True)
labels = X_s_var
plot_df_single_color(df_X_s_u, X_s_var, labels)
"""### Single Flight Cycle"""
df_X_s_u_c = df_X_s.loc[(df_A.unit == 2) & (df_A.cycle == 1)]
df_X_s_u_c.reset_index(inplace=True, drop=True)
plot_df_single_color(df_X_s_u_c, X_s_var, X_s_var)
"""## Virtual Sensors ($X_v$)"""
df_X_v = DataFrame(data=X_v, columns=X_v_var)
df_X_v_u_c = df_X_v.loc[(df_A.unit == 2) & (df_A.cycle == 1)]
df_X_v_u_c.reset_index(inplace=True, drop=True)
plot_df_single_color(df_X_v_u_c, X_v_var, X_v_var)
"""## Health state ($h_s$)"""
plot_df_color_per_unit(df_A, ['hs'], [r'$h_s$ [-]'], option='cycle')