From d8319448696fa4d54c4d5fe0acc0608bc04f0df0 Mon Sep 17 00:00:00 2001 From: "cameron.johnson" Date: Thu, 19 Oct 2023 18:49:14 -0400 Subject: [PATCH] Ongoing non-HMM step tracking code addition --- .gitignore | 1 + .../global_step_prediction/__init__.py | 0 .../predict_global_step.py | 339 ++++++++++++++++++ .../predict_global_step_randForest.py | 272 ++++++++++++++ ...teps_cofig-recipe-coffee-shortstrings.yaml | 210 +++++++++++ 5 files changed, 822 insertions(+) create mode 100644 angel_system/global_step_prediction/__init__.py create mode 100644 angel_system/global_step_prediction/predict_global_step.py create mode 100644 angel_system/global_step_prediction/predict_global_step_randForest.py create mode 100644 config/tasks/task_steps_cofig-recipe-coffee-shortstrings.yaml diff --git a/.gitignore b/.gitignore index f80e0d1f1..7aa136cba 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ /.container_xauth /model_files /ros_bags +/outputs ### Python template # Byte-compiled / optimized / DLL files diff --git a/angel_system/global_step_prediction/__init__.py b/angel_system/global_step_prediction/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/angel_system/global_step_prediction/predict_global_step.py b/angel_system/global_step_prediction/predict_global_step.py new file mode 100644 index 000000000..8221204d1 --- /dev/null +++ b/angel_system/global_step_prediction/predict_global_step.py @@ -0,0 +1,339 @@ +import yaml +import os +import seaborn as sn +import numpy as np +import kwcoco +import matplotlib.pyplot as plt +from sklearn.metrics import confusion_matrix +import scipy.ndimage as ndi + +def sanitize_str(str_: str): + """ + Convert string to lowercase and emove trailing whitespace and period. + + :param str_: Input text + + :return: ``str_`` converted to lowercase and stripped of trailing whitespace and period. + :rtype: str + """ + return str_.lower().strip(" .") + +def plot_positive_GT_conf_distributions(activity_confs, activity_gt): + """ + plot_TP_conf_distributions: + For each activity, plot the distribution of confidences when ground + truth indicates that activity is happening. + + i.e.: for activity x, for frames in which ground truth = x, plot + the distribution of confidences. + + Inputs: + activity_confs: frames x class-wise-confidences. Given a kwcoco + dataset called "coco": + ``` + activity_confs = torch.asarray(coco.images().lookup("activity_conf")) + ``` + (49K x 25 for coffee val set.) + activity_gt: frames x ground truth activity_id. + Given a kwcoco dataset called "coco": + ``` + activity_gt = torch.asarray(coco.images().lookup("activity_gt")) + ``` + """ + + sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)}) + + # Get data together + true_confs = [float(activity_confs[i,truth_ind]) for i, truth_ind in enumerate(activity_gt)] + data = {"true_conf":true_confs, "gt":activity_gt} + df = pd.DataFrame(data) + + false_confs = np.array([[a for i, a in enumerate(act_conf) if i != gt] for act_conf, gt in zip(activity_confs, activity_gt)]).flatten() + false_gt = np.array([[gt for i, a in enumerate(act_conf) if i != gt] for act_conf, gt in zip(activity_confs, activity_gt)]).flatten() + data_opposite = {"true_conf":false_confs, "gt":false_gt} + df_opposite = pd.DataFrame(data_opposite) + + def plot(df): + # Initialize the FacetGrid object + pal = sns.cubehelix_palette(10, rot=-.25, light=.7) + g = sns.FacetGrid(df, row="gt", hue="gt", aspect=15, height=.5, palette=pal) + + # Draw the densities in a few steps + g.map(sns.kdeplot, "true_conf", + bw_adjust=.5, clip_on=False, + fill=True, alpha=1, linewidth=1.5) + g.map(sns.kdeplot, "true_conf", clip_on=False, color="w", lw=2, bw_adjust=.5) + + # passing color=None to refline() uses the hue mapping + g.refline(y=0, linewidth=2, linestyle="-", color=None, clip_on=False) + + # Define and use a simple function to label the plot in axes coordinates + def label(x, color, label): + ax = plt.gca() + ax.text(0, .2, label, fontweight="bold", color=color, + ha="left", va="center", transform=ax.transAxes) + g.map(label, "true_conf") + + # Set the subplots to overlap + g.figure.subplots_adjust(hspace=-.25) + + # Remove axes details that don't play well with overlap + g.set_titles("") + g.set(yticks=[], ylabel="") + g.despine(bottom=True, left=True) + + # save + plt.savefig("./outputs/plot_positive_GT_conf_distributions.png") + + +def bilateralFtr1D(y, sSpatial = 5, sIntensity = 1): + ''' + The equation of the bilateral filter is + + ( dx ^ 2 ) ( dI ^2 ) + F = exp (- ----------------- ) * exp (- ------------------- ) + ( sigma_spatial ^ 2 ) ( sigma_Intensity ^ 2 ) + ~~~~~~~~~~~~~~~~~~~~~~~~~~ + This is a guassian filter! + dx - The 'geometric' distance between the 'center pixel' and the pixel + to sample + dI - The difference between the intensity of the 'center pixel' and + the pixel to sample + sigma_spatial and sigma_Intesity are constants. Higher values mean + that we 'tolerate more' higher value of the distances dx and dI. + + Dependencies: numpy, scipy.ndimage.gaussian_filter1d + + calc gaussian kernel size as: filterSize = (2 * radius) + 1; radius = floor (2 * sigma_spatial) + y - input data + ''' + + # gaussian filter and parameters + radius = np.floor (2 * sSpatial) + filterSize = ((2 * radius) + 1) + ftrArray = np.zeros(int(filterSize)) + ftrArray[int(radius)] = 1 + + # Compute the Gaussian filter part of the Bilateral filter + gauss = ndi.gaussian_filter1d(ftrArray, sSpatial) + + # 1d data dimensions + width = y.size + + # 1d resulting data + ret = np.zeros (width) + + for i in range(width): + + ## To prevent accessing values outside of the array + # The left part of the lookup area, clamped to the boundary + xmin = max(i - radius, 1); + # How many columns were outside the image, on the left? + dxmin = xmin - (i - radius); + + # The right part of the lookup area, clamped to the boundary + xmax = min(i + radius, width); + # How many columns were outside the image, on the right? + dxmax = (i + radius) - xmax; + + # The actual range of the array we will look at + area = y [int(xmin):int(xmax)] + + # The center position + center = y[i] + + # The left expression in the bilateral filter equation + # We take only the relevant parts of the matrix of the + # Gaussian weights - we use dxmin, dxmax, dymin, dymax to + # ignore the parts that are outside the image + expS = gauss[int((1+dxmin)):int((filterSize-dxmax))] + + # The right expression in the bilateral filter equation + dy = y [int(xmin):int(xmax)] - y[i] + dIsquare = (dy * dy) + expI = np.exp (- dIsquare / (sIntensity * sIntensity)) + + # The bilater filter (weights matrix) + F = expI * expS + + # Normalized bilateral filter + Fnormalized = F / sum(F) + + # Multiply the area by the filter + tempY = y [int(xmin):int(xmax)] * Fnormalized + + # The resulting pixel is the sum of all the pixels in + # the area, according to the weights of the filter + # ret(i,j,R) = sum (tempR(:)) + ret[i] = sum (tempY) + + return ret + + +def get_average_TP_activations(coco): + # For each activity, given the Ground Truth-specified + # frame subset where that activity is happening, get the + # average activation of that class. + + all_activity_ids = np.unique(np.asarray(coco.images().lookup('activity_gt'))) + all_vid_ids = np.unique(np.asarray(coco.images().lookup('video_id'))) + + avg_probs = np.zeros(max(all_activity_ids) + 1) + + for activity_id in all_activity_ids: + #image_ids = coco.index.vidid_to_gids[vid_id] + image_ids = [img['id'] for img in coco.videos(video_ids=all_vid_ids).images[0].objs if img['activity_gt'] == activity_id] + sub_dset = coco.subset(gids=image_ids, copy=True) + probs_for_true_inds = np.asarray( + sub_dset.images().lookup("activity_conf"))[:,activity_id] + avg_prob = np.mean(probs_for_true_inds) + avg_probs[activity_id] = avg_prob + + return avg_probs + +config_fn = "config/tasks/task_steps_cofig-recipe-coffee-shortstrings.yaml" +with open(config_fn, "r") as stream: + config = yaml.safe_load(stream) +labels = [sanitize_str(l["description"]) for l in config["steps"]] +steps = config['steps'] +if steps[0]['id'] == 1: + config['steps'].insert(0, {'id':0, + 'activity_id':0, + 'description':'background', + 'median_duration_seconds':0.5, + 'mean_conf':0.5, + 'std_conf':0.2, + }) + +coco_val = kwcoco.CocoDataset("model_files/val_activity_preds_epoch40.mscoco.json") +coco_test = kwcoco.CocoDataset("model_files/test_activity_preds.mscoco.json") + +image_ids = coco_test.index.vidid_to_gids[3] +video_dset = coco_test.subset(gids=image_ids, copy=True) + +# "Training": for each activity class, see what the average "true positive" +# activation was. +avg_probs = get_average_TP_activations(coco_test) +print(f"average_probs = {avg_probs}") + +all_vid_ids = np.unique(np.asarray(coco_val.images().lookup('video_id'))) + +for vid_id in all_vid_ids: + print(f"vid_id {vid_id}") + + image_ids = coco_test.index.vidid_to_gids[vid_id] + video_dset = coco_test.subset(gids=image_ids, copy=True) + + # All N activity confs x each video frame + activity_confs = video_dset.images().lookup("activity_conf") + + next_step = 1 + step_predictions = [] + num_frames_activated = 0 + + # Predicted step: confidence has been above threshold for 5 frames. + threshold_frame_count = 8 + for i, activity_conf in enumerate(activity_confs): + + # Check if we're done: if so, append last step & continue + if next_step == len(steps): + step_predictions.append(next_step-1) + continue + # Next step + next_activity_id = steps[next_step]['activity_id'] + next_next_activity_id = steps[min(len(steps)-1,next_step + 1)][ + 'activity_id'] + + next_activity_conf = activity_conf[next_activity_id] + next_next_activity_conf = activity_conf[next_next_activity_id] + + avg_prob_next_activity = avg_probs[next_activity_id] + avg_prob_next_next_activity = avg_probs[next_next_activity_id] + ''' + if next_activity_id == 16 and vid_id == 2: + print(f"next_activity_id = {next_activity_id}") + print(f"avg_prob_next_activity = {avg_prob_next_activity}") + ''' + if i > 15: + threshold_frame_count = 16 + + if next_activity_conf > 0.8 * avg_prob_next_activity: + num_frames_activated += 1 + ''' + if next_activity_id == 16 and vid_id == 2: + print(f"num_frames_activated = {num_frames_activated}. prob = {next_activity_conf}") + ''' + else: + num_frames_activated = 0 + if next_next_activity_conf > 0.8 * avg_prob_next_activity: + num_skip2_frames_activated += 1 + else: + num_skip2_frames_activated = 0 + + if num_frames_activated >= threshold_frame_count: + #if next_step < 23: + #next_step += 1 + next_step += 1 + num_frames_activated = 0 + num_skip2_frames_activated = 0 + elif num_skip2_frames_activated >= threshold_frame_count: + next_step = min(next_step + 2, len(steps)) + num_frames_activated = 0 + num_skip2_frames_activated = 0 + print("hit a skip-step!!") + + step_predictions.append(next_step-1) + + # Ground truth step: + activity_gts = video_dset.images().lookup("activity_gt") + step_gts = [] + step_gts_no_background = [] + current_step = 0 + for activity_gt in activity_gts: + # convert activity id to step id + step_id = next(int(item['id']) for item in steps if item['activity_id'] == activity_gt) + step_gts.append(step_id) + + # A version of GT that never jumps back to 0 + if step_id > 0: + current_step = step_id + step_gts_no_background.append(current_step) + + + # Plot confusion matrix + fig, ax = plt.subplots(figsize=(100, 100)) + cm = confusion_matrix(step_gts_no_background, step_predictions, normalize="true") + sn.heatmap(cm, annot=True, fmt="0.0%", ax=ax, linewidth=.5) + sn.set(font_scale=4) + ax.set( + title="Confusion Matrix", + xlabel="Predicted Label", + ylabel="True Label",) + fig.savefig(f"./outputs/plot_confusion_mat_vid{vid_id}.png") + + # Plot gt vs predicted class across all vid frames + fig = plt.figure() + sn.set(font_scale=1) + step_gts = [float(i) for i in step_gts] + plt.plot(step_gts, label = 'gt') + plt.plot(step_predictions, label = 'estimated') + #plt.plot(inliers-0.5, label = 'inliers') + plt.plot(10*np.asarray(activity_confs)[:,17]-5, label = 'act_preds[17]') + plt.plot(10*np.asarray(activity_confs)[:,18]-5, label = 'act_preds[18]') + plt.plot(10*np.asarray(activity_confs)[:,19]-5, label = 'act_preds[19]') + + plt.plot(bilateralFtr1D(10*np.asarray(activity_confs)[:,17])-10, label = 'act_preds_bilateral[17]') + plt.plot(bilateralFtr1D(10*np.asarray(activity_confs)[:,18])-10, label = 'act_pred_bilateral[18]') + plt.plot(bilateralFtr1D(10*np.asarray(activity_confs)[:,19])-10, label = 'act_preds_bilateral[19]') + #plt.plot(10*X_conf_incremental, label = 'confidence') + #plt.plot(10*vid_acts[:,10], label = act_labels[10]) + #plt.plot(10*vid_acts[:,11], label = act_labels[11]) + #plt.plot(10*vid_acts[:,12], label = act_labels[12]) + plt.legend() + fig.savefig(f"./outputs/plot_pred_vs_gt_vid{vid_id}.png") + + if False: + plot_positive_GT_conf_distributions(activity_confs, activity_gt) + + + diff --git a/angel_system/global_step_prediction/predict_global_step_randForest.py b/angel_system/global_step_prediction/predict_global_step_randForest.py new file mode 100644 index 000000000..89d7d883a --- /dev/null +++ b/angel_system/global_step_prediction/predict_global_step_randForest.py @@ -0,0 +1,272 @@ +import yaml +import os +import seaborn as sn +import numpy as np +import kwcoco +import matplotlib.pyplot as plt +import sklearn +import sklearn.ensemble +from sklearn.metrics import confusion_matrix +import scipy.ndimage as ndi +import torch + +def sanitize_str(str_: str): + """ + Convert string to lowercase and emove trailing whitespace and period. + + :param str_: Input text + + :return: ``str_`` converted to lowercase and stripped of trailing whitespace and period. + :rtype: str + """ + return str_.lower().strip(" .") + +def bilateralFtr1D(y, sSpatial = 5, sIntensity = 1): + ''' + The equation of the bilateral filter is + + ( dx ^ 2 ) ( dI ^2 ) + F = exp (- ----------------- ) * exp (- ------------------- ) + ( sigma_spatial ^ 2 ) ( sigma_Intensity ^ 2 ) + ~~~~~~~~~~~~~~~~~~~~~~~~~~ + This is a guassian filter! + dx - The 'geometric' distance between the 'center pixel' and the pixel + to sample + dI - The difference between the intensity of the 'center pixel' and + the pixel to sample + sigma_spatial and sigma_Intesity are constants. Higher values mean + that we 'tolerate more' higher value of the distances dx and dI. + + Dependencies: numpy, scipy.ndimage.gaussian_filter1d + + calc gaussian kernel size as: filterSize = (2 * radius) + 1; radius = floor (2 * sigma_spatial) + y - input data + ''' + + # gaussian filter and parameters + radius = np.floor (2 * sSpatial) + filterSize = ((2 * radius) + 1) + ftrArray = np.zeros(int(filterSize)) + ftrArray[int(radius)] = 1 + + # Compute the Gaussian filter part of the Bilateral filter + gauss = ndi.gaussian_filter1d(ftrArray, sSpatial) + + # 1d data dimensions + width = y.size + + # 1d resulting data + ret = np.zeros (width) + + for i in range(width): + + ## To prevent accessing values outside of the array + # The left part of the lookup area, clamped to the boundary + xmin = max(i - radius, 1); + # How many columns were outside the image, on the left? + dxmin = xmin - (i - radius); + + # The right part of the lookup area, clamped to the boundary + xmax = min(i + radius, width); + # How many columns were outside the image, on the right? + dxmax = (i + radius) - xmax; + + # The actual range of the array we will look at + area = y [int(xmin):int(xmax)] + + # The center position + center = y[i] + + # The left expression in the bilateral filter equation + # We take only the relevant parts of the matrix of the + # Gaussian weights - we use dxmin, dxmax, dymin, dymax to + # ignore the parts that are outside the image + expS = gauss[int((1+dxmin)):int((filterSize-dxmax))] + + # The right expression in the bilateral filter equation + dy = y [int(xmin):int(xmax)] - y[i] + dIsquare = (dy * dy) + expI = np.exp (- dIsquare / (sIntensity * sIntensity)) + + # The bilater filter (weights matrix) + F = expI * expS + + # Normalized bilateral filter + Fnormalized = F / sum(F) + + # Multiply the area by the filter + tempY = y [int(xmin):int(xmax)] * Fnormalized + + # The resulting pixel is the sum of all the pixels in + # the area, according to the weights of the filter + # ret(i,j,R) = sum (tempR(:)) + ret[i] = sum (tempY) + + return ret + + +def get_average_TP_activations(coco, clf): + # For each activity, given the Ground Truth-specified + # frame subset where that activity is happening, get the + # average activation of that class. + + all_activity_ids = np.unique(np.asarray(coco.images().lookup('activity_gt'))) + all_vid_ids = np.unique(np.asarray(coco.images().lookup('video_id'))) + + activity_confs = torch.asarray(coco.images().lookup("activity_conf")) + new_probs = clf.predict_proba(activity_confs) + new_probs_all_classes = np.zeros((new_probs.shape[0], new_probs.shape[1]+1)) + new_probs_all_classes[:,0:17] = new_probs[:,0:17] + new_probs_all_classes[:,18:] = new_probs[:,17:] + + avg_probs = np.zeros(max(all_activity_ids) + 1) + + for activity_id in all_activity_ids: + image_ids = [img['id'] for img in coco.videos(video_ids=all_vid_ids).images[0].objs if img['activity_gt'] == activity_id] + probs_for_true_inds = np.asarray(new_probs_all_classes)[image_ids][:,activity_id] + avg_prob = np.mean(probs_for_true_inds) + avg_probs[activity_id] = avg_prob + + # import ipdb; ipdb.set_trace() + + return avg_probs + +def train_random_forest(coco): + activity_confs = torch.asarray(coco.images().lookup("activity_conf")) + activity_preds = torch.asarray(coco.images().lookup("activity_pred")) + activity_gt = torch.asarray(coco.images().lookup("activity_gt")) + n_classes = len(activity_confs[0]) + clf = sklearn.ensemble.RandomForestClassifier(n_estimators = 100, max_depth=2, random_state=0) #, class_weight="balanced") + # training + clf.fit(activity_confs,activity_gt) + + # Sanity check: print out training dataset performance + y_hat= clf.predict(activity_confs) + + TP = np.sum(activity_gt.numpy()==y_hat) + n = y_hat.shape[0] + print(f'{TP}/{n} Train RF Accuracy {100*TP/n:0.2f}%') + + TP = np.sum(activity_gt.numpy()==activity_preds.numpy()) + n = y_hat.shape[0] + print(f'{TP}/{n} TCN Accuracy {100*TP/n:0.2f}%') + + return clf + + +config_fn = "config/tasks/task_steps_cofig-recipe-coffee-shortstrings.yaml" +with open(config_fn, "r") as stream: + config = yaml.safe_load(stream) +labels = [sanitize_str(l["description"]) for l in config["steps"]] +steps = config['steps'] +if steps[0]['id'] == 1: + config['steps'].insert(0, {'id':0, + 'activity_id':0, + 'description':'background', + 'median_duration_seconds':0.5, + 'mean_conf':0.5, + 'std_conf':0.2, + }) + +coco_val = kwcoco.CocoDataset("model_files/val_activity_preds_epoch40.mscoco.json") +coco_test = kwcoco.CocoDataset("model_files/test_activity_preds.mscoco.json") + +image_ids = coco_test.index.vidid_to_gids[3] +video_dset = coco_test.subset(gids=image_ids, copy=True) + +# "Training": for each activity class, see what the average "true positive" +# activation was. +clf = train_random_forest(coco_test) +avg_probs = get_average_TP_activations(coco_test, clf) +print(f"average_probs = {avg_probs}") + +all_vid_ids = np.unique(np.asarray(coco_val.images().lookup('video_id'))) + +for vid_id in all_vid_ids: + print(f"vid_id {vid_id}") + + image_ids = coco_test.index.vidid_to_gids[vid_id] + video_dset = coco_test.subset(gids=image_ids, copy=True) + + # All N activity confs x each video frame + activity_confs = video_dset.images().lookup("activity_conf") + new_probs = clf.predict_proba(activity_confs) + new_probs_all_classes = np.zeros((new_probs.shape[0], new_probs.shape[1]+1)) + new_probs_all_classes[:,0:17] = new_probs[:,0:17] + new_probs_all_classes[:,18:] = new_probs[:,17:] + + + next_step = 1 + step_predictions = [] + num_frames_activated = 0 + + # Predicted step: confidence has been above threshold for 5 frames. + for activity_conf in new_probs_all_classes: + # Next step + next_activity_id = steps[next_step]['activity_id'] + + next_activity_conf = activity_conf[next_activity_id] + + avg_prob_next_activity = avg_probs[next_activity_id] + + if next_activity_conf > 0.8 * avg_prob_next_activity: + num_frames_activated += 1 + else: + num_frames_activated = 0 + + if num_frames_activated >= 8: + if next_step < 23: + next_step += 1 + num_frames_activated = 0 + + step_predictions.append(next_step-1) + + # Ground truth step: + activity_gts = video_dset.images().lookup("activity_gt") + step_gts = [] + step_gts_no_background = [] + current_step = 0 + for activity_gt in activity_gts: + # convert activity id to step id + step_id = next(int(item['id']) for item in steps if item['activity_id'] == activity_gt) + step_gts.append(step_id) + + # A version of GT that never jumps back to 0 + if step_id > 0: + current_step = step_id + step_gts_no_background.append(current_step) + + + # Plot confusion matrix + fig, ax = plt.subplots(figsize=(100, 100)) + cm = confusion_matrix(step_gts_no_background, step_predictions) + sn.heatmap(cm, annot=True, fmt="g", ax=ax) + sn.set(font_scale=4) + ax.set( + title="Confusion Matrix", + xlabel="Predicted Label", + ylabel="True Label",) + fig.savefig(f"./outputs/plot_confusion_mat_vid{vid_id}.png") + + # Plot gt vs predicted class across all vid frames + fig = plt.figure() + sn.set(font_scale=1) + step_gts = [float(i) for i in step_gts] + plt.plot(step_gts, label = 'gt') + plt.plot(step_predictions, label = 'estimated') + #plt.plot(inliers-0.5, label = 'inliers') + plt.plot(10*np.asarray(activity_confs)[:,17]-5, label = 'act_preds[17]') + plt.plot(10*np.asarray(activity_confs)[:,18]-5, label = 'act_preds[18]') + plt.plot(10*np.asarray(activity_confs)[:,19]-5, label = 'act_preds[19]') + + plt.plot(bilateralFtr1D(10*np.asarray(activity_confs)[:,17])-10, label = 'act_preds_bilateral[17]') + plt.plot(bilateralFtr1D(10*np.asarray(activity_confs)[:,18])-10, label = 'act_pred_bilateral[18]') + plt.plot(bilateralFtr1D(10*np.asarray(activity_confs)[:,19])-10, label = 'act_preds_bilateral[19]') + #plt.plot(10*X_conf_incremental, label = 'confidence') + #plt.plot(10*vid_acts[:,10], label = act_labels[10]) + #plt.plot(10*vid_acts[:,11], label = act_labels[11]) + #plt.plot(10*vid_acts[:,12], label = act_labels[12]) + plt.legend() + fig.savefig(f"./outputs/plot_pred_vs_gt_vid{vid_id}.png") + + diff --git a/config/tasks/task_steps_cofig-recipe-coffee-shortstrings.yaml b/config/tasks/task_steps_cofig-recipe-coffee-shortstrings.yaml new file mode 100644 index 000000000..1ac538b34 --- /dev/null +++ b/config/tasks/task_steps_cofig-recipe-coffee-shortstrings.yaml @@ -0,0 +1,210 @@ +# Schema version. +version: "1.0" + +# Reference to the activity classification labels configuration that we will +# reference into. +activity_labels: "./config/activity_labels/recipe_coffee.yaml" + +# Reference to the file defining the mean and standard deviation of the +# activity classifications to be used by the HMM. For N activities, both the +# mean and standard deviation should be N x N matrices such that when activity +# i is actually occuring, the classifier will emit confidence +# mean[i, j] +/- std[i, j] for activity j. +activity_mean_and_std_file: "./model_files/recipe_coffee_shortstrings_mean_std.npy" + +# Task title for display purposes. +title: "Pour-over coffee" + +# Layout of the steps that define this task. +steps: + # Item format: + # - id: Identifying integer for the step. + # - activity_id: The ID of an activity classification associated with this + # step. This must reference an ID within the `activity_labels` + # configuration file referenced above. + # - description: Human semantic description of this step. + # - median_duration_seconds: Median expected time this task will + # consume in seconds. + # - mean_conf: mean value of classifier confidence for true examples. + # - std_conf: standard deviation of confidence for both true and false + # examples. + - id: 1 # Must start at 1, 0 is reserved for background. + activity_id: 1 + description: >- + measure-12oz-water + median_duration_seconds: 5 + mean_conf: 0.5 + std_conf: 0.2 + - id: 2 + activity_id: 2 + description: >- + pour-water-kettle + median_duration_seconds: 5 + mean_conf: 0.5 + std_conf: 0.2 + - id: 3 + activity_id: 24 + description: >- + turn-on-kettle + median_duration_seconds: 5 + mean_conf: 0.5 + std_conf: 0.2 + - id: 4 + activity_id: 3 + description: >- + place-dipper-on-mug + median_duration_seconds: 5 + mean_conf: 0.5 + std_conf: 0.2 + - id: 5 + activity_id: 4 + description: >- + filter-fold-half + median_duration_seconds: 5 + mean_conf: 0.5 + std_conf: 0.2 + - id: 6 + activity_id: 5 + description: >- + filter-fold-quarter + median_duration_seconds: 5 + mean_conf: 0.5 + std_conf: 0.2 + - id: 7 + activity_id: 6 + description: >- + place-filter + median_duration_seconds: 5 + mean_conf: 0.5 + std_conf: 0.2 + - id: 8 + activity_id: 7 + description: >- + spread-filter + median_duration_seconds: 5 + mean_conf: 0.5 + std_conf: 0.2 + - id: 9 + activity_id: 8 + description: >- + scale-turn-on + median_duration_seconds: 5 + mean_conf: 0.5 + std_conf: 0.2 + - id: 10 + activity_id: 9 + description: >- + place-bowl-on-scale + median_duration_seconds: 5 + mean_conf: 0.5 + std_conf: 0.2 + - id: 11 + activity_id: 10 + description: >- + zero-scale + median_duration_seconds: 5 + mean_conf: 0.5 + std_conf: 0.2 + - id: 12 + activity_id: 11 + description: >- + measure-coffee-beans + median_duration_seconds: 5 + mean_conf: 0.5 + std_conf: 0.2 + - id: 13 + activity_id: 12 + description: >- + pour-coffee-grinder + median_duration_seconds: 5 + mean_conf: 0.5 + std_conf: 0.2 + - id: 14 + activity_id: 13 + description: >- + grind-beans + median_duration_seconds: 5 + mean_conf: 0.5 + std_conf: 0.2 + - id: 15 + activity_id: 14 + description: >- + pour-beans-filter + median_duration_seconds: 5 + mean_conf: 0.5 + std_conf: 0.2 + - id: 16 + activity_id: 15 + description: >- + thermometer-turn-on + median_duration_seconds: 5 + mean_conf: 0.5 + std_conf: 0.2 + - id: 17 + activity_id: 16 + description: >- + thermometer-in-water + median_duration_seconds: 5 + mean_conf: 0.5 + std_conf: 0.2 + - id: 18 + activity_id: 18 + description: >- + pour-water-grounds-wet + median_duration_seconds: 5 + mean_conf: 0.5 + std_conf: 0.2 + - id: 19 + activity_id: 19 + description: >- + pour-water-grounds-circular + median_duration_seconds: 5 + mean_conf: 0.5 + std_conf: 0.2 + - id: 20 + activity_id: 20 + description: >- + water-drain + median_duration_seconds: 5 + mean_conf: 0.5 + std_conf: 0.2 + - id: 21 + activity_id: 21 + description: >- + remove-dripper + median_duration_seconds: 5 + mean_conf: 0.5 + std_conf: 0.2 + - id: 22 + activity_id: 22 + description: >- + remove-grounds + median_duration_seconds: 5 + mean_conf: 0.5 + std_conf: 0.2 + - id: 23 + activity_id: 23 + description: >- + discard-grounds + median_duration_seconds: 5 + mean_conf: 0.5 + std_conf: 0.2 + +# Hidden markov model configuration parameters +hmm: + # Time (seconds) between time steps of HMM. Sets the temporal precision of + # the HMM analysis at the expense of processing costs. + dt: 0.5 + + # Constrain whether HMM sequence can skip steps or jump backwards. When both + # values are set to 0, forward progress without skipping steps is enforced. + num_steps_can_jump_fwd: 1 + num_steps_can_jump_bck: 0 + + # Default classifier mean confidence to use if not explicitly provided for a + # step. + default_mean_conf: 0.5 + + # Default classifier standard deviation of confidence to use if not + # explicitly provided for a step. + default_std_conf: 0.2