From 4e44569e171418bb62e3b621090aed9e841a1c0f Mon Sep 17 00:00:00 2001
From: ni79ls <35937077+ni79ls@users.noreply.github.com>
Date: Tue, 4 Sep 2018 15:53:53 +0200
Subject: [PATCH] Add files via upload

---
 ...903_Keras_HAR_WISDM_CNN_v1.0_for_medium.py | 408 ++++++++++++++++++
 1 file changed, 408 insertions(+)
 create mode 100644 20180903_Keras_HAR_WISDM_CNN_v1.0_for_medium.py

diff --git a/20180903_Keras_HAR_WISDM_CNN_v1.0_for_medium.py b/20180903_Keras_HAR_WISDM_CNN_v1.0_for_medium.py
new file mode 100644
index 0000000..a0433c5
--- /dev/null
+++ b/20180903_Keras_HAR_WISDM_CNN_v1.0_for_medium.py
@@ -0,0 +1,408 @@
+# Compatibility layer between Python 2 and Python 3
+from __future__ import print_function
+from matplotlib import pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+from scipy import stats
+
+from sklearn import metrics
+from sklearn.metrics import classification_report
+from sklearn import preprocessing
+
+import keras
+from keras.models import Sequential
+from keras.layers import Dense, Dropout, Flatten, Reshape, GlobalAveragePooling1D
+from keras.layers import Conv2D, MaxPooling2D, Conv1D, MaxPooling1D
+from keras.utils import np_utils
+
+# %%
+
+def feature_normalize(dataset):
+
+    mu = np.mean(dataset, axis=0)
+    sigma = np.std(dataset, axis=0)
+    return (dataset - mu)/sigma
+
+
+def show_confusion_matrix(validations, predictions):
+
+    matrix = metrics.confusion_matrix(validations, predictions)
+    plt.figure(figsize=(6, 4))
+    sns.heatmap(matrix,
+                cmap="coolwarm",
+                linecolor='white',
+                linewidths=1,
+                xticklabels=LABELS,
+                yticklabels=LABELS,
+                annot=True,
+                fmt="d")
+    plt.title("Confusion Matrix")
+    plt.ylabel("True Label")
+    plt.xlabel("Predicted Label")
+    plt.show()
+
+
+def show_basic_dataframe_info(dataframe,
+                              preview_rows=20):
+
+    """
+    This function shows basic information for the given dataframe
+
+    Args:
+        dataframe: A Pandas DataFrame expected to contain data
+        preview_rows: An integer value of how many rows to preview
+
+    Returns:
+        Nothing
+    """
+
+    # Shape and how many rows and columns
+    print("Number of columns in the dataframe: %i" % (dataframe.shape[1]))
+    print("Number of rows in the dataframe: %i\n" % (dataframe.shape[0]))
+    print("First 20 rows of the dataframe:\n")
+    # Show first 20 rows
+    print(dataframe.head(preview_rows))
+    print("\nDescription of dataframe:\n")
+    # Describe dataset like mean, min, max, etc.
+    # print(dataframe.describe())
+
+
+def read_data(file_path):
+
+    """
+    This function reads the accelerometer data from a file
+
+    Args:
+        file_path: URL pointing to the CSV file
+
+    Returns:
+        A pandas dataframe
+    """
+
+    column_names = ['user-id',
+                    'activity',
+                    'timestamp',
+                    'x-axis',
+                    'y-axis',
+                    'z-axis']
+    df = pd.read_csv(file_path,
+                     header=None,
+                     names=column_names)
+    # Last column has a ";" character which must be removed ...
+    df['z-axis'].replace(regex=True,
+      inplace=True,
+      to_replace=r';',
+      value=r'')
+    # ... and then this column must be transformed to float explicitly
+    df['z-axis'] = df['z-axis'].apply(convert_to_float)
+    # This is very important otherwise the model will not fit and loss
+    # will show up as NAN
+    df.dropna(axis=0, how='any', inplace=True)
+
+    return df
+
+
+def convert_to_float(x):
+
+    try:
+        return np.float(x)
+    except:
+        return np.nan
+
+
+# Not used right now
+def feature_normalize(dataset):
+
+    mu = np.mean(dataset, axis=0)
+    sigma = np.std(dataset, axis=0)
+    return (dataset - mu)/sigma
+
+
+def plot_axis(ax, x, y, title):
+
+    ax.plot(x, y)
+    ax.set_title(title)
+    ax.xaxis.set_visible(False)
+    ax.set_ylim([min(y) - np.std(y), max(y) + np.std(y)])
+    ax.set_xlim([min(x), max(x)])
+    ax.grid(True)
+
+
+def plot_activity(activity, data):
+
+    fig, (ax0, ax1, ax2) = plt.subplots(nrows=3,
+         figsize=(15, 10),
+         sharex=True)
+    plot_axis(ax0, data['timestamp'], data['x-axis'], 'x-axis')
+    plot_axis(ax1, data['timestamp'], data['y-axis'], 'y-axis')
+    plot_axis(ax2, data['timestamp'], data['z-axis'], 'z-axis')
+    plt.subplots_adjust(hspace=0.2)
+    fig.suptitle(activity)
+    plt.subplots_adjust(top=0.90)
+    plt.show()
+
+
+def create_segments_and_labels(df, time_steps, step, label_name):
+
+    """
+    This function receives a dataframe and returns the reshaped segments
+    of x,y,z acceleration as well as the corresponding labels
+
+    Args:
+        df: Dataframe in the expected format
+        time_steps: Integer value of the length of a segment that is created
+    Returns:
+        reshaped_segments
+        labels:
+    """
+
+    # x, y, z acceleration as features
+    N_FEATURES = 3
+    # Number of steps to advance in each iteration (for me, it should always
+    # be equal to the time_steps in order to have no overlap between segments)
+    # step = time_steps
+    segments = []
+    labels = []
+    for i in range(0, len(df) - time_steps, step):
+        xs = df['x-axis'].values[i: i + time_steps]
+        ys = df['y-axis'].values[i: i + time_steps]
+        zs = df['z-axis'].values[i: i + time_steps]
+        # Retrieve the most often used label in this segment
+        label = stats.mode(df[label_name][i: i + time_steps])[0][0]
+        segments.append([xs, ys, zs])
+        labels.append(label)
+
+    # Bring the segments into a better shape
+    reshaped_segments = np.asarray(segments, dtype= np.float32).reshape(-1, time_steps, N_FEATURES)
+    labels = np.asarray(labels)
+
+    return reshaped_segments, labels
+
+# %%
+
+# ------- THE PROGRAM TO LOAD DATA AND TRAIN THE MODEL -------
+
+# Set some standard parameters upfront
+pd.options.display.float_format = '{:.1f}'.format
+sns.set() # Default seaborn look and feel
+plt.style.use('ggplot')
+print('keras version ', keras.__version__)
+
+LABELS = ["Downstairs",
+          "Jogging",
+          "Sitting",
+          "Standing",
+          "Upstairs",
+          "Walking"]
+# The number of steps within one time segment
+TIME_PERIODS = 80
+# The steps to take from one segment to the next; if this value is equal to
+# TIME_PERIODS, then there is no overlap between the segments
+STEP_DISTANCE = 40
+
+# %%
+
+print("\n--- Load, inspect and transform data ---\n")
+
+# Load data set containing all the data from csv
+df = read_data('Data/WISDM_ar_v1.1_raw.txt')
+
+# Describe the data
+show_basic_dataframe_info(df, 20)
+
+df['activity'].value_counts().plot(kind='bar',
+                                   title='Training Examples by Activity Type')
+plt.show()
+
+df['user-id'].value_counts().plot(kind='bar',
+                                  title='Training Examples by User')
+plt.show()
+
+for activity in np.unique(df["activity"]):
+    subset = df[df["activity"] == activity][:180]
+    plot_activity(activity, subset)
+
+# Define column name of the label vector
+LABEL = "ActivityEncoded"
+# Transform the labels from String to Integer via LabelEncoder
+le = preprocessing.LabelEncoder()
+# Add a new column to the existing DataFrame with the encoded values
+df[LABEL] = le.fit_transform(df["activity"].values.ravel())
+
+# %%
+
+print("\n--- Reshape the data into segments ---\n")
+
+# Differentiate between test set and training set
+df_test = df[df['user-id'] > 28]
+df_train = df[df['user-id'] <= 28]
+
+# Normalize features for training data set
+df_train['x-axis'] = feature_normalize(df['x-axis'])
+df_train['y-axis'] = feature_normalize(df['y-axis'])
+df_train['z-axis'] = feature_normalize(df['z-axis'])
+# Round in order to comply to NSNumber from iOS
+df_train = df_train.round({'x-axis': 6, 'y-axis': 6, 'z-axis': 6})
+
+# Reshape the training data into segments
+# so that they can be processed by the network
+x_train, y_train = create_segments_and_labels(df_train,
+                                              TIME_PERIODS,
+                                              STEP_DISTANCE,
+                                              LABEL)
+
+# %%
+
+print("\n--- Reshape data to be accepted by Keras ---\n")
+
+# Inspect x data
+print('x_train shape: ', x_train.shape)
+# Displays (20869, 40, 3)
+print(x_train.shape[0], 'training samples')
+# Displays 20869 train samples
+
+# Inspect y data
+print('y_train shape: ', y_train.shape)
+# Displays (20869,)
+
+# Set input & output dimensions
+num_time_periods, num_sensors = x_train.shape[1], x_train.shape[2]
+num_classes = le.classes_.size
+print(list(le.classes_))
+
+# Set input_shape / reshape for Keras
+# Remark: acceleration data is concatenated in one array in order to feed
+# it properly into coreml later, the preferred matrix of shape [40,3]
+# cannot be read in with the current version of coreml (see also reshape
+# layer as the first layer in the keras model)
+input_shape = (num_time_periods*num_sensors)
+x_train = x_train.reshape(x_train.shape[0], input_shape)
+
+print('x_train shape:', x_train.shape)
+# x_train shape: (20869, 120)
+print('input_shape:', input_shape)
+# input_shape: (120)
+
+# Convert type for Keras otherwise Keras cannot process the data
+x_train = x_train.astype("float32")
+y_train = y_train.astype("float32")
+
+# %%
+
+# One-hot encoding of y_train labels (only execute once!)
+y_train = np_utils.to_categorical(y_train, num_classes)
+print('New y_train shape: ', y_train.shape)
+# (4173, 6)
+
+# %%
+
+print("\n--- Create neural network model ---\n")
+
+# 1D CNN neural network
+model_m = Sequential()
+model_m.add(Reshape((TIME_PERIODS, num_sensors), input_shape=(input_shape,)))
+model_m.add(Conv1D(100, 10, activation='relu', input_shape=(TIME_PERIODS, num_sensors)))
+model_m.add(Conv1D(100, 10, activation='relu'))
+model_m.add(MaxPooling1D(3))
+model_m.add(Conv1D(160, 10, activation='relu'))
+model_m.add(Conv1D(160, 10, activation='relu'))
+model_m.add(GlobalAveragePooling1D())
+model_m.add(Dropout(0.5))
+model_m.add(Dense(num_classes, activation='softmax'))
+print(model_m.summary())
+# Accuracy on training data: 99%
+# Accuracy on test data: 91%
+
+# %%
+
+print("\n--- Fit the model ---\n")
+
+# The EarlyStopping callback monitors training accuracy:
+# if it fails to improve for two consecutive epochs,
+# training stops early
+callbacks_list = [
+    keras.callbacks.ModelCheckpoint(
+        filepath='best_model.{epoch:02d}-{val_loss:.2f}.h5',
+        monitor='val_loss', save_best_only=True),
+    keras.callbacks.EarlyStopping(monitor='acc', patience=1)
+]
+
+model_m.compile(loss='categorical_crossentropy',
+                optimizer='adam', metrics=['accuracy'])
+
+# Hyper-parameters
+BATCH_SIZE = 400
+EPOCHS = 50
+
+# Enable validation to use ModelCheckpoint and EarlyStopping callbacks.
+history = model_m.fit(x_train,
+                      y_train,
+                      batch_size=BATCH_SIZE,
+                      epochs=EPOCHS,
+                      callbacks=callbacks_list,
+                      validation_split=0.2,
+                      verbose=1)
+
+# %%
+
+print("\n--- Learning curve of model training ---\n")
+
+# summarize history for accuracy and loss
+plt.figure(figsize=(6, 4))
+plt.plot(history.history['acc'], "g--", label="Accuracy of training data")
+plt.plot(history.history['val_acc'], "g", label="Accuracy of validation data")
+plt.plot(history.history['loss'], "r--", label="Loss of training data")
+plt.plot(history.history['val_loss'], "r", label="Loss of validation data")
+plt.title('Model Accuracy and Loss')
+plt.ylabel('Accuracy and Loss')
+plt.xlabel('Training Epoch')
+plt.ylim(0)
+plt.legend()
+plt.show()
+
+#%%
+
+print("\n--- Check against test data ---\n")
+
+# Normalize features for training data set
+df_test['x-axis'] = feature_normalize(df_test['x-axis'])
+df_test['y-axis'] = feature_normalize(df_test['y-axis'])
+df_test['z-axis'] = feature_normalize(df_test['z-axis'])
+
+df_test = df_test.round({'x-axis': 6, 'y-axis': 6, 'z-axis': 6})
+
+x_test, y_test = create_segments_and_labels(df_test,
+                                            TIME_PERIODS,
+                                            STEP_DISTANCE,
+                                            LABEL)
+
+# Set input_shape / reshape for Keras
+x_test = x_test.reshape(x_test.shape[0], input_shape)
+
+x_test = x_test.astype("float32")
+y_test = y_test.astype("float32")
+
+y_test = np_utils.to_categorical(y_test, num_classes)
+
+score = model_m.evaluate(x_test, y_test, verbose=1)
+
+print("\nAccuracy on test data: %0.2f" % score[1])
+print("\nLoss on test data: %0.2f" % score[0])
+
+# %%
+
+print("\n--- Confusion matrix for test data ---\n")
+
+y_pred_test = model_m.predict(x_test)
+# Take the class with the highest probability from the test predictions
+max_y_pred_test = np.argmax(y_pred_test, axis=1)
+max_y_test = np.argmax(y_test, axis=1)
+
+show_confusion_matrix(max_y_test, max_y_pred_test)
+
+# %%
+
+print("\n--- Classification report for test data ---\n")
+
+print(classification_report(max_y_test, max_y_pred_test))
\ No newline at end of file