diff --git a/materials/R/worksheet_regression1/.ipynb_checkpoints/worksheet_regression1-checkpoint.ipynb b/materials/R/worksheet_regression1/.ipynb_checkpoints/worksheet_regression1-checkpoint.ipynb new file mode 100644 index 0000000..56a9d95 --- /dev/null +++ b/materials/R/worksheet_regression1/.ipynb_checkpoints/worksheet_regression1-checkpoint.ipynb @@ -0,0 +1,1676 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "d4829b728b7113c57a089bba81d0b9ab", + "grade": false, + "grade_id": "cell-6093d584be2ad9ed", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "# Worksheet: Regression I: K-nearest neighbors\n", + "\n", + "This worksheet covers the [Regression I: K-nearest neighbors](https://datasciencebook.ca/regression1.html) chapter of the online textbook, which also lists the learning objectives for this worksheet. You should read the textbook chapter before attempting this worksheet. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "68e6cf3045316eb49bd79a27a9b60f9c", + "grade": false, + "grade_id": "cell-53085d5554f9dcc8", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "### Run this cell before continuing.\n", + "library(tidyverse)\n", + "library(repr)\n", + "library(tidymodels)\n", + "options(repr.matrix.max.rows = 6)\n", + "source('cleanup.R')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "004a90536551b93c7af8a8b5d3e92957", + "grade": false, + "grade_id": "cell-8e40e06455324a19", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "**Question 0.0** Multiple Choice: \n", + "
{points: 1}\n", + "\n", + "To predict a value of $Y$ for a new observation using $k$-nn **regression**, we identify the $k$-nearest neighbours and then:\n", + "\n", + "A. Assign it the median of the $k$-nearest neighbours as the predicted value\n", + "\n", + "B. Assign it the mean of the $k$-nearest neighbours as the predicted value\n", + "\n", + "C. Assign it the mode of the $k$-nearest neighbours as the predicted value\n", + "\n", + "D. Assign it the majority vote of the $k$-nearest neighbours as the predicted value\n", + "\n", + "*Save the letter of the answer you think is correct to a variable named `answer0.0`. Make sure your answer is an uppercase letter and is surrounded by quotation marks (e.g. `\"F\"`).*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "b2a94d99e15ed916391daed3643fe923", + "grade": false, + "grade_id": "cell-108236d99368cc44", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer0.0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "b73094b84d34433c99720e08ae57ca22", + "grade": true, + "grade_id": "cell-7d5938e33a38edeb", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer0.0 is not character\"= setequal(digest(paste(toString(class(answer0.0)), \"687d1\")), \"b0606851f8ab1403affe70dae88277a1\"))\n", + "stopifnot(\"length of answer0.0 is not correct\"= setequal(digest(paste(toString(length(answer0.0)), \"687d1\")), \"8be1aa3b66de219b03427ea3a762d5f1\"))\n", + "stopifnot(\"value of answer0.0 is not correct\"= setequal(digest(paste(toString(tolower(answer0.0)), \"687d1\")), \"29c90bbc32809ae085b6d948b602ab03\"))\n", + "stopifnot(\"letters in string value of answer0.0 are correct but case is not correct\"= setequal(digest(paste(toString(answer0.0), \"687d1\")), \"7ef26a441a8f13b24e927d8758ee9942\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "95e3437a546e5c50fc1f72c94db602de", + "grade": false, + "grade_id": "cell-73d7913554df0781", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "### RMSPE Definition\n", + "\n", + "**Question 0.1** Multiple Choice: \n", + "
{points: 1}\n", + "\n", + "What does RMSPE stand for?\n", + "\n", + "\n", + "A. root mean squared prediction error\n", + "\n", + "B. root mean squared percentage error \n", + "\n", + "C. root mean squared performance error \n", + "\n", + "D. root mean squared preference error \n", + "\n", + "*Save the letter of the answer you think is correct to a variable named `answer0.1`. Make sure you put quotations around the letter and pay attention to case.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "f164912efe33cd5ed2060a318346e2a6", + "grade": false, + "grade_id": "cell-81be027da315cb3f", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer0.1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "b06b6e1dee0dd6c9df975345e10496e8", + "grade": true, + "grade_id": "cell-db93265fd197f97f", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer0.1 is not character\"= setequal(digest(paste(toString(class(answer0.1)), \"8c44c\")), \"2afe90074fad3ec3878a60a49c4f380e\"))\n", + "stopifnot(\"length of answer0.1 is not correct\"= setequal(digest(paste(toString(length(answer0.1)), \"8c44c\")), \"5ef1b720a52eb9027ba23705d33bbf41\"))\n", + "stopifnot(\"value of answer0.1 is not correct\"= setequal(digest(paste(toString(tolower(answer0.1)), \"8c44c\")), \"85f9cc5e75fbe01d797bb60a41779cbd\"))\n", + "stopifnot(\"letters in string value of answer0.1 are correct but case is not correct\"= setequal(digest(paste(toString(answer0.1), \"8c44c\")), \"ab7de5459f5be145c6006d1c73ee311e\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "a5e9f226b02857ae7e1850945c63fd68", + "grade": false, + "grade_id": "cell-b0f4fa2237ef0429", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 0.2** Multiple Choice: \n", + "
{points: 1}\n", + "\n", + "Of those shown below, which is the correct formula for RMSPE?\n", + "\n", + "\n", + "A. $RMSPE = \\sqrt{\\frac{\\frac{1}{n}\\sum\\limits_{i=1}^{n}(y_i - \\hat{y_i})^2}{1 - n}}$\n", + "\n", + "B. $RMSPE = \\sqrt{\\frac{1}{n - 1}\\sum\\limits_{i=1}^{n}(y_i - \\hat{y_i})^2}$\n", + "\n", + "C. $RMSPE = \\sqrt{\\frac{1}{n}\\sum\\limits_{i=1}^{n}(y_i - \\hat{y_i})^2}$\n", + "\n", + "D. $RMSPE = \\sqrt{\\frac{1}{n}\\sum\\limits_{i=1}^{n}(y_i - \\hat{y_i})}$ \n", + "\n", + "*Save the letter of the answer you think is correct to a variable named `answer0.2`. Make sure you put quotations around the letter and pay attention to case.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "0517b55812bcad016fd4e921018fc075", + "grade": false, + "grade_id": "cell-93c565d6974de38f", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer0.2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "c27234f45a3a9cccef78e692f8cafe3d", + "grade": true, + "grade_id": "cell-b1e66491132c1e30", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer0.2 is not character\"= setequal(digest(paste(toString(class(answer0.2)), \"9bea8\")), \"3fdc4aab2072a9cfa24a7f38521b6a97\"))\n", + "stopifnot(\"length of answer0.2 is not correct\"= setequal(digest(paste(toString(length(answer0.2)), \"9bea8\")), \"c13695fa7859703bd79c245a30e9062b\"))\n", + "stopifnot(\"value of answer0.2 is not correct\"= setequal(digest(paste(toString(tolower(answer0.2)), \"9bea8\")), \"0404480a667292e5fc0a7fa025a8ec1b\"))\n", + "stopifnot(\"letters in string value of answer0.2 are correct but case is not correct\"= setequal(digest(paste(toString(answer0.2), \"9bea8\")), \"53ddfcba7833c00fc9fcc981a95a8ee2\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "654988da1cfd73473caa7dccd2e3bbc3", + "grade": false, + "grade_id": "cell-ded5f8fa4e621b81", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "**Question 0.3**\n", + "
{points: 1}\n", + "\n", + "The plot below is a very simple k-nn regression example, where the black dots are the data observations and the blue line is the predictions from a $k$-nn regression model created from this data where $k=2$.\n", + "\n", + "Using the formula for RMSE (given in the reading), and the graph below, by hand (pen and paper or use R as a calculator) calculate RMSE for this model. Use **one decimal place of precision** when inputting the heights of the black dots and blue line. Save your answer to a variable named `answer0.3` \n", + "\n", + "Notes:\n", + " - RMSE (Root Mean Squared Error) evaluates model performance on **training data**. \n", + " - RMSPE (Root Mean Squared Prediction Error) measures performance on **testing or validation data**\n", + " - The predicted value when x = 1 is 1.3 (it's a bit hard to tell from the figure!) \n", + "\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "7fbf2d7959474537e5e864b1349839b1", + "grade": false, + "grade_id": "cell-8dc7cc6f35de80cb", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer0.3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "d7d52ee25b3e5201424ee91dfee3295f", + "grade": true, + "grade_id": "cell-d90b3e7b890ce86b", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of round(answer0.3, 2) is not numeric\"= setequal(digest(paste(toString(class(round(answer0.3, 2))), \"2645c\")), \"890f753574357a14c6dc46b6fde1d4a1\"))\n", + "stopifnot(\"value of round(answer0.3, 2) is not correct (rounded to 2 decimal places)\"= setequal(digest(paste(toString(round(round(answer0.3, 2), 2)), \"2645c\")), \"584909b564382325b5e2e51f497a8f06\"))\n", + "stopifnot(\"length of round(answer0.3, 2) is not correct\"= setequal(digest(paste(toString(length(round(answer0.3, 2))), \"2645c\")), \"654f47c4e06ab41e08e86365cf3674de\"))\n", + "stopifnot(\"values of round(answer0.3, 2) are not correct\"= setequal(digest(paste(toString(sort(round(round(answer0.3, 2), 2))), \"2645c\")), \"584909b564382325b5e2e51f497a8f06\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "c94e4bc2ed7206d48d7655fae0781008", + "grade": false, + "grade_id": "cell-e612110169987c7b", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "## Marathon Training\n", + "\n", + "\n", + "\n", + "Source: https://media.giphy.com/media/nUN6InE2CodRm/giphy.gif\n", + "\n", + "What predicts which athletes will perform better than others? Specifically, we are interested in marathon runners, and looking at how the maximum distance ran per week (in miles) during race training predicts the time it takes a runner to finish the race? For this, we will be looking at the `marathon.csv` file in the `data/` folder." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "9ea30c1434a67e3876226cd543d43d4c", + "grade": false, + "grade_id": "cell-16d7d432c49c9e53", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "**Question 1.0** \n", + "
{points: 1}\n", + "\n", + "Load the data and assign it to an object called `marathon`. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "e808fb877b552ca26971fdb18432c347", + "grade": false, + "grade_id": "cell-5376dc2c39983258", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "marathon" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "f158322eb711309083b0018fbff50ea4", + "grade": true, + "grade_id": "cell-60554f6f6a56a16c", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"marathon should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon)), \"5ad7f\")), \"5dc9c7456ffe76eefefacaf68de81da5\"))\n", + "stopifnot(\"dimensions of marathon are not correct\"= setequal(digest(paste(toString(dim(marathon)), \"5ad7f\")), \"6d38bc2129d410a7271441253593089b\"))\n", + "stopifnot(\"column names of marathon are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon))), \"5ad7f\")), \"caee9f419eb341232e7caf99aae02dbb\"))\n", + "stopifnot(\"types of columns in marathon are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon, class)))), \"5ad7f\")), \"7f418de7bf624da22d05cf8350c89fb9\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon, is.numeric))) sort(round(sapply(marathon[, sapply(marathon, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"5ad7f\")), \"7bde6e3e524402788cd80a3dbcaefc36\"))\n", + "stopifnot(\"values in one or more character columns in marathon are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon, is.character))) sum(sapply(marathon[sapply(marathon, is.character)], function(x) length(unique(x)))) else 0), \"5ad7f\")), \"11e84deb8b0074672fa0750b8d32c822\"))\n", + "stopifnot(\"values in one or more factor columns in marathon are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon, is.factor))) sum(sapply(marathon[, sapply(marathon, is.factor)], function(col) length(unique(col)))) else 0), \"5ad7f\")), \"11e84deb8b0074672fa0750b8d32c822\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "19151cf672ee9d30f6c913c2c409d37d", + "grade": false, + "grade_id": "cell-26802c14f46a4d8e", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "**Question 2.0**\n", + "
{points: 1}\n", + "\n", + "We want to predict race time (in hours) (`time_hrs`) given a particular value of maximum distance ran per week (in miles) during race training (`max`). Let's take a subset of size 50 individuals of our marathon data and assign it to an object called `marathon_50`. With this subset, plot a scatterplot to assess the relationship between these two variables. Put `time_hrs` on the y-axis and `max` on the x-axis. **Assign this plot to an object called `answer2`.** Discuss, with a classmate, the relationship between race time and maximum distance ran per week during training based on the scatterplot you create below. \n", + "\n", + "*Hint: To take a subset of your data you can use the `slice_sample()` function*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "fae00bce988bfcb06ccbe4b969adc789", + "grade": false, + "grade_id": "cell-6fcfc6ca32883f4d", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "options(repr.plot.width = 8, repr.plot.height = 7)\n", + "\n", + "set.seed(2000) ### DO NOT CHANGE\n", + "\n", + "#... <- ... |>\n", + "# slice_sample(n = ...)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "56be3f23b8abedaf034cf4bde6251ad2", + "grade": true, + "grade_id": "cell-78ac50fb41cd9a86", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"marathon_50 should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon_50)), \"9e3b0\")), \"f648a715ec3a68a64053d2106e756ee9\"))\n", + "stopifnot(\"dimensions of marathon_50 are not correct\"= setequal(digest(paste(toString(dim(marathon_50)), \"9e3b0\")), \"d5867fbcf32b27e63a31d963e2778026\"))\n", + "stopifnot(\"column names of marathon_50 are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon_50))), \"9e3b0\")), \"8cbb07f1cf13637e1d3f1c8128fe5261\"))\n", + "stopifnot(\"types of columns in marathon_50 are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon_50, class)))), \"9e3b0\")), \"8805ddc99e3eba80b2db048da1b00656\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon_50 are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_50, is.numeric))) sort(round(sapply(marathon_50[, sapply(marathon_50, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"9e3b0\")), \"8643938e5e8f4fd61198304546a1dcc6\"))\n", + "stopifnot(\"values in one or more character columns in marathon_50 are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_50, is.character))) sum(sapply(marathon_50[sapply(marathon_50, is.character)], function(x) length(unique(x)))) else 0), \"9e3b0\")), \"50aa503f2ae3e0cb19f0e21034a1e821\"))\n", + "stopifnot(\"values in one or more factor columns in marathon_50 are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_50, is.factor))) sum(sapply(marathon_50[, sapply(marathon_50, is.factor)], function(col) length(unique(col)))) else 0), \"9e3b0\")), \"50aa503f2ae3e0cb19f0e21034a1e821\"))\n", + "\n", + "stopifnot(\"type of plot is not correct (if you are using two types of geoms, try flipping the order of the geom objects!)\"= setequal(digest(paste(toString(sapply(seq_len(length(answer2$layers)), function(i) {c(class(answer2$layers[[i]]$geom))[1]})), \"9e3b1\")), \"fe9a3330961d28c5ecdb6766ba3592d8\"))\n", + "stopifnot(\"variable x is not correct\"= setequal(digest(paste(toString(unlist(lapply(sapply(seq_len(length(answer2$layers)), function(i) {rlang::get_expr(c(answer2$layers[[i]]$mapping, answer2$mapping)$x)}), as.character))), \"9e3b1\")), \"697608c06e95946e39213ecc7ba8022b\"))\n", + "stopifnot(\"variable y is not correct\"= setequal(digest(paste(toString(unlist(lapply(sapply(seq_len(length(answer2$layers)), function(i) {rlang::get_expr(c(answer2$layers[[i]]$mapping, answer2$mapping)$y)}), as.character))), \"9e3b1\")), \"39a0150a1a3ee6b63305919920a11d21\"))\n", + "stopifnot(\"x-axis label is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(answer2$layers[[1]]$mapping, answer2$mapping)$x)!= answer2$labels$x), \"9e3b1\")), \"882d81a5500cdab7c2502ead198340c1\"))\n", + "stopifnot(\"y-axis label is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(answer2$layers[[1]]$mapping, answer2$mapping)$y)!= answer2$labels$y), \"9e3b1\")), \"882d81a5500cdab7c2502ead198340c1\"))\n", + "stopifnot(\"incorrect colour variable in answer2, specify a correct one if required\"= setequal(digest(paste(toString(rlang::get_expr(c(answer2$layers[[1]]$mapping, answer2$mapping)$colour)), \"9e3b1\")), \"5094e6199ef0447e1850cc341c9594ea\"))\n", + "stopifnot(\"incorrect shape variable in answer2, specify a correct one if required\"= setequal(digest(paste(toString(rlang::get_expr(c(answer2$layers[[1]]$mapping, answer2$mapping)$shape)), \"9e3b1\")), \"5094e6199ef0447e1850cc341c9594ea\"))\n", + "stopifnot(\"the colour label in answer2 is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(answer2$layers[[1]]$mapping, answer2$mapping)$colour) != answer2$labels$colour), \"9e3b1\")), \"5094e6199ef0447e1850cc341c9594ea\"))\n", + "stopifnot(\"the shape label in answer2 is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(answer2$layers[[1]]$mapping, answer2$mapping)$colour) != answer2$labels$shape), \"9e3b1\")), \"5094e6199ef0447e1850cc341c9594ea\"))\n", + "stopifnot(\"fill variable in answer2 is not correct\"= setequal(digest(paste(toString(quo_name(answer2$mapping$fill)), \"9e3b1\")), \"2697b9782ee1dcc08fd59f501a0d2b1b\"))\n", + "stopifnot(\"fill label in answer2 is not informative\"= setequal(digest(paste(toString((quo_name(answer2$mapping$fill) != answer2$labels$fill)), \"9e3b1\")), \"5094e6199ef0447e1850cc341c9594ea\"))\n", + "stopifnot(\"position argument in answer2 is not correct\"= setequal(digest(paste(toString(class(answer2$layers[[1]]$position)[1]), \"9e3b1\")), \"62aeaa372676e7b727922d989fbb01ef\"))\n", + "\n", + "stopifnot(\"answer2$data should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(answer2$data)), \"9e3b2\")), \"d56807b5efb6d39a647f5f9f5d214e64\"))\n", + "stopifnot(\"dimensions of answer2$data are not correct\"= setequal(digest(paste(toString(dim(answer2$data)), \"9e3b2\")), \"9c0858ff2b06e5cd35f0cd2479aed7b8\"))\n", + "stopifnot(\"column names of answer2$data are not correct\"= setequal(digest(paste(toString(sort(colnames(answer2$data))), \"9e3b2\")), \"dd976f74510f1315e62f5f989f301774\"))\n", + "stopifnot(\"types of columns in answer2$data are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(answer2$data, class)))), \"9e3b2\")), \"ade7b8d4b09aaa303ad5f3f0221afa86\"))\n", + "stopifnot(\"values in one or more numerical columns in answer2$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(answer2$data, is.numeric))) sort(round(sapply(answer2$data[, sapply(answer2$data, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"9e3b2\")), \"60f21159c4706fd7d4b459d51a844e99\"))\n", + "stopifnot(\"values in one or more character columns in answer2$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(answer2$data, is.character))) sum(sapply(answer2$data[sapply(answer2$data, is.character)], function(x) length(unique(x)))) else 0), \"9e3b2\")), \"7850fd9f1acc1d6d1335ab1e14118c5b\"))\n", + "stopifnot(\"values in one or more factor columns in answer2$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(answer2$data, is.factor))) sum(sapply(answer2$data[, sapply(answer2$data, is.factor)], function(col) length(unique(col)))) else 0), \"9e3b2\")), \"7850fd9f1acc1d6d1335ab1e14118c5b\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "34f89317595dbfcb55227675d68dc3a2", + "grade": false, + "grade_id": "cell-cbe9f9a5c4b7c264", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "**Question 3.0**\n", + "
{points: 1}\n", + "\n", + "Suppose we want to predict the race time for someone who ran a maximum distance of 100 miles per week during training. In the plot below we can see that no one has run a maximum distance of 100 miles per week. But, if we are interested in prediction, how can we predict with this data? We can use $k$-nn regression! To do this we get the $Y$ values (target/response variable) of the nearest $k$ values and then take their average and use that as the prediction. \n", + "\n", + "For this question predict the race time based on the 4 closest neighbors to the 100 miles per week during training.\n", + "\n", + "*Fill in the scaffolding below and assign your answer to an object named `answer3`.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "75efaaf8fe04ae9b8a48d555d95b030b", + "grade": false, + "grade_id": "cell-51d4b16bee915d31", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# run this cell to see a visualization of the 4 nearest neighbours\n", + "options(repr.plot.height = 6, repr.plot.width = 7)\n", + "marathon_50 |>\n", + " ggplot(aes(x = max, y = time_hrs)) + \n", + " geom_point(color = 'dodgerblue', alpha = 0.4) +\n", + " geom_vline(xintercept = 100, linetype = \"dotted\") +\n", + " xlab(\"Maximum Distance Ran per \\n Week During Training (mi)\") +\n", + " ylab(\"Race Time (hours)\") + \n", + " geom_segment(aes(x = 100, y = 2.56, xend = 107, yend = 2.56), col = \"orange\") +\n", + " geom_segment(aes(x = 100, y = 2.65, xend = 90, yend = 2.65), col = \"orange\") +\n", + " geom_segment(aes(x = 100, y = 2.99, xend = 86, yend = 2.99), col = \"orange\") +\n", + " geom_segment(aes(x = 100, y = 3.05, xend = 82, yend = 3.05), col = \"orange\") +\n", + " theme(text = element_text(size = 20))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "4babff4645a19226f94b42c479b7645c", + "grade": false, + "grade_id": "cell-a8d1b15c312d9c3d", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "#... <- ... |> \n", + "# mutate(diff = abs(100 - ...)) |> \n", + "# ...(diff, ...) |> \n", + "# summarise(predicted = ...(...)) |>\n", + "# pull()\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "40efca56d6817e8226aadabbff223a30", + "grade": true, + "grade_id": "cell-af7afd4794001c7b", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of round(answer3, 2) is not numeric\"= setequal(digest(paste(toString(class(round(answer3, 2))), \"6a21c\")), \"c9d2cea1a4dac1d859d0730c044d41eb\"))\n", + "stopifnot(\"value of round(answer3, 2) is not correct (rounded to 2 decimal places)\"= setequal(digest(paste(toString(round(round(answer3, 2), 2)), \"6a21c\")), \"08e6448336d1a3eee509e49a52478ab3\"))\n", + "stopifnot(\"length of round(answer3, 2) is not correct\"= setequal(digest(paste(toString(length(round(answer3, 2))), \"6a21c\")), \"f1a7515d3d2d2fa10482af081e950276\"))\n", + "stopifnot(\"values of round(answer3, 2) are not correct\"= setequal(digest(paste(toString(sort(round(round(answer3, 2), 2))), \"6a21c\")), \"08e6448336d1a3eee509e49a52478ab3\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "b1f9aa206f133969fcff7bf2009bbd0e", + "grade": false, + "grade_id": "cell-0c2107e7f52efb3d", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 4.0**\n", + "
{points: 1}\n", + "\n", + "For this question, let's instead predict the race time based on the 2 closest neighbors to the 100 miles per week during training.\n", + "\n", + "*Assign your answer to an object named `answer4`.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "3371f7bb2081a1f1eb48c702eebd05b6", + "grade": false, + "grade_id": "cell-44994bbd299ff0d0", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "cf11ee56927a5f67cce41fab07df1be4", + "grade": true, + "grade_id": "cell-690e322810064165", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of round(answer4, 2) is not numeric\"= setequal(digest(paste(toString(class(round(answer4, 2))), \"9a73b\")), \"a47e4de9e1b3b13d1772b79fd8689fa1\"))\n", + "stopifnot(\"value of round(answer4, 2) is not correct (rounded to 2 decimal places)\"= setequal(digest(paste(toString(round(round(answer4, 2), 2)), \"9a73b\")), \"26bd1aeeaa56c6025f423f5b2763517d\"))\n", + "stopifnot(\"length of round(answer4, 2) is not correct\"= setequal(digest(paste(toString(length(round(answer4, 2))), \"9a73b\")), \"f3f027e877cfe92c5d8d7ec45f5ba816\"))\n", + "stopifnot(\"values of round(answer4, 2) are not correct\"= setequal(digest(paste(toString(sort(round(round(answer4, 2), 2))), \"9a73b\")), \"26bd1aeeaa56c6025f423f5b2763517d\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "a7b5791bb4f841adb405ff5f20f2fc83", + "grade": false, + "grade_id": "cell-3311ba44befa3767", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "**Question 5.0** Multiple Choice:\n", + "
{points: 1}\n", + "\n", + "So far you have calculated the $k$ nearest neighbors predictions manually based on $k$'s we have told you to use. However, last week we learned how to use a better method to choose the best $k$ for classification. \n", + "\n", + "Based on what you learned last week and what you have learned about $k$-nn regression so far this week, which method would you use to choose the $k$ (in the situation where we don't tell you which $k$ to use)?\n", + "\n", + "- A) Choose the $k$ that excludes most outliers\n", + "- B) Choose the $k$ with the lowest training error\n", + "- C) Choose the $k$ with the lowest cross-validation error\n", + "- D) Choose the $k$ that includes the most data points\n", + "- E) Choose the $k$ with the lowest testing error\n", + "\n", + "*Assign your answer to an object called `answer5`. Make sure your answer is an uppercase letter and is surrounded by quotation marks (e.g. `\"F\"`).*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "fe8b9cbbbd4aa881d0e9ab40c0a0b96e", + "grade": false, + "grade_id": "cell-01b498d6b1415bf5", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer5" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "58622171f4b7eebca91c18d83f42b6c2", + "grade": true, + "grade_id": "cell-7cbe965dbdb7228d", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer5 is not character\"= setequal(digest(paste(toString(class(answer5)), \"37693\")), \"fc6c3130145f0fb80210523789d385c4\"))\n", + "stopifnot(\"length of answer5 is not correct\"= setequal(digest(paste(toString(length(answer5)), \"37693\")), \"2b4eb9a0e93cd3a15acd636e0a9e8a04\"))\n", + "stopifnot(\"value of answer5 is not correct\"= setequal(digest(paste(toString(tolower(answer5)), \"37693\")), \"e83660aa1020beda884cddb739a36f9e\"))\n", + "stopifnot(\"letters in string value of answer5 are correct but case is not correct\"= setequal(digest(paste(toString(answer5), \"37693\")), \"5c12e05e798903dd9637c879b3219bb2\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "1ac1a0e15ea3ffa70b2260b0aa682b40", + "grade": false, + "grade_id": "cell-b3b34499fe540e49", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "**Question 6.0**\n", + "
{points: 1}\n", + "\n", + "We have just seen how to perform k-nn regression manually, now we will apply it to the whole dataset using the `tidymodels` package. To do so, we will first need to create the training and testing datasets. Split the data using *75%* of the `marathon` data as your training set and set `time_hrs` as the `strata` argument. Store this data into an object called `marathon_split`. \n", + "\n", + "Then, use the appropriate `training` and `testing` functions to create your training set which you will call `marathon_training` and your testing set which you will call `marathon_testing`. Remember we won't touch the test dataset until the end. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "f1bfffda9ee38c9d79c21ae78613aac4", + "grade": false, + "grade_id": "cell-8257a9338314019e", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "set.seed(2000) ### DO NOT CHANGE\n", + "\n", + "#... <- initial_split(..., prop = ..., strata = ...)\n", + "#... <- training(...)\n", + "#... <- testing(...)\n", + "\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "b98ad482754c638881e3c0db12763b9e", + "grade": true, + "grade_id": "cell-381c8aeacf15fd35", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of 'rsplit' %in% class(marathon_split) is not logical\"= setequal(digest(paste(toString(class('rsplit' %in% class(marathon_split))), \"995eb\")), \"c37a6a3d32ece56dbf2983b316d56f96\"))\n", + "stopifnot(\"logical value of 'rsplit' %in% class(marathon_split) is not correct\"= setequal(digest(paste(toString('rsplit' %in% class(marathon_split)), \"995eb\")), \"76c52a1e5ab0897e6e3a3035b2593922\"))\n", + "\n", + "stopifnot(\"marathon_training should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon_training)), \"995ec\")), \"501a47a75b2b457beff8e2ac89a8324c\"))\n", + "stopifnot(\"dimensions of marathon_training are not correct\"= setequal(digest(paste(toString(dim(marathon_training)), \"995ec\")), \"ad2869e2d34a6446cea1f65866f28007\"))\n", + "stopifnot(\"column names of marathon_training are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon_training))), \"995ec\")), \"63ef62bbcc5de6b68df5d6cb8caf22f4\"))\n", + "stopifnot(\"types of columns in marathon_training are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon_training, class)))), \"995ec\")), \"a5ba97618641aaa7581f575a22d28ebb\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon_training are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_training, is.numeric))) sort(round(sapply(marathon_training[, sapply(marathon_training, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"995ec\")), \"ed3dc19b532630b6bee7218c98fe8eaa\"))\n", + "stopifnot(\"values in one or more character columns in marathon_training are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_training, is.character))) sum(sapply(marathon_training[sapply(marathon_training, is.character)], function(x) length(unique(x)))) else 0), \"995ec\")), \"63f3d6429054bdfc33cca12ddf1b6add\"))\n", + "stopifnot(\"values in one or more factor columns in marathon_training are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_training, is.factor))) sum(sapply(marathon_training[, sapply(marathon_training, is.factor)], function(col) length(unique(col)))) else 0), \"995ec\")), \"63f3d6429054bdfc33cca12ddf1b6add\"))\n", + "\n", + "stopifnot(\"marathon_testing should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon_testing)), \"995ed\")), \"368652787cd859898436316bcd337f05\"))\n", + "stopifnot(\"dimensions of marathon_testing are not correct\"= setequal(digest(paste(toString(dim(marathon_testing)), \"995ed\")), \"1de930989154e3098eadae58a71a200c\"))\n", + "stopifnot(\"column names of marathon_testing are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon_testing))), \"995ed\")), \"3219387bce679fc5b3707d5e9f8a69aa\"))\n", + "stopifnot(\"types of columns in marathon_testing are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon_testing, class)))), \"995ed\")), \"82f32f5c3c8d2ba42979f74327ea0d94\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon_testing are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_testing, is.numeric))) sort(round(sapply(marathon_testing[, sapply(marathon_testing, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"995ed\")), \"17a9b62568b0ae6c109f07cb1e94c751\"))\n", + "stopifnot(\"values in one or more character columns in marathon_testing are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_testing, is.character))) sum(sapply(marathon_testing[sapply(marathon_testing, is.character)], function(x) length(unique(x)))) else 0), \"995ed\")), \"da4b3b1d7a3eb9fbf3c9b437f2cb2306\"))\n", + "stopifnot(\"values in one or more factor columns in marathon_testing are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_testing, is.factor))) sum(sapply(marathon_testing[, sapply(marathon_testing, is.factor)], function(col) length(unique(col)))) else 0), \"995ed\")), \"da4b3b1d7a3eb9fbf3c9b437f2cb2306\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "f592eb9ca03dafc6d078406ffa752028", + "grade": false, + "grade_id": "cell-0c01c0b6cf4d8e91", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 7.0**\n", + "
{points: 1}\n", + "\n", + "Next, we’ll use cross-validation on our **training data** to choose $k$. In $k$-nn classification, we used accuracy to see how well our predictions matched the true labels. In the context of $k$-nn *regression*, we will use RMSPE instead. Interpreting the RMSPE value can be tricky but generally speaking, if the prediction values are very close to the true values, the RMSPE will be small. Conversely, if the prediction values are *not* very close to the true values, the RMSPE will be quite large. \n", + "\n", + "Let's perform a cross-validation and choose the optimal $k$. First, create a model specification for $k$-nn. We are still using the $k$-nearest neighbours algorithm, and so we will still use the same package for the model engine as we did in classification (`\"kknn\"`). As usual, specify that we want to use the *straight-line distance*. However, since this will be a regression problem, we will use `set_mode(\"regression\")` in the model specification. Store your model specification in an object called `marathon_spec`. \n", + "\n", + "Moreover, create a recipe to preprocess our data. Store your recipe in an object called `marathon_recipe`. The recipe should specify that the response variable is race time (hrs) and the predictor is maximum distance ran per week during training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "31868ce196b9c3e528132deaef193f83", + "grade": false, + "grade_id": "cell-766f1e094dd32efc", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "set.seed(1234) #DO NOT REMOVE\n", + "\n", + "#... <- nearest_neighbor(weight_func = ..., neighbors = ...) |> \n", + "# set_engine(...) |>\n", + "# set_mode(...) \n", + "\n", + "#... <- recipe(... ~ ..., data = ...) |>\n", + "# step_scale(...) |>\n", + "# step_center(...)\n", + "# \n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "marathon_recipe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "a48db7525cb8d5d744a96a5e697b7e9c", + "grade": true, + "grade_id": "cell-289bc7a299c4f482", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"marathon_spec should be a model specification\"= setequal(digest(paste(toString('model_spec' %in% class(marathon_spec)), \"5547\")), \"8ab83844edd34a74021cc1e79fa8cd3e\"))\n", + "stopifnot(\"model specification in marathon_spec is not correct\"= setequal(digest(paste(toString(marathon_spec$mode), \"5547\")), \"b1f1ba983a6682171a6e25213f46a7ec\"))\n", + "stopifnot(\"computational engine in marathon_spec is not correct\"= setequal(digest(paste(toString(marathon_spec$engine), \"5547\")), \"ca33af34f570b911430bce17b5d274b4\"))\n", + "stopifnot(\"weight function in marathon_spec is not correct\"= setequal(digest(paste(toString(quo_name(marathon_spec$args$weight_func)), \"5547\")), \"6a7d1a8c96c075f8ca2b992a6e40a384\"))\n", + "stopifnot(\"number of neighbours in marathon_spec is not correct\"= setequal(digest(paste(toString(quo_name(marathon_spec$args$neighbors)), \"5547\")), \"7367907047c067710c11a10603159af5\"))\n", + "\n", + "stopifnot(\"marathon_recipe should be a recipe\"= setequal(digest(paste(toString('recipe' %in% class(marathon_recipe)), \"5548\")), \"dafc3ea33a8e76c60aa970ef5a81ceca\"))\n", + "stopifnot(\"response variable of marathon_recipe is not correct\"= setequal(digest(paste(toString(sort(filter(marathon_recipe$var_info, role == 'outcome')$variable)), \"5548\")), \"6161ba47763d24154550d947a5e1ad8d\"))\n", + "stopifnot(\"predictor variable(s) of marathon_recipe are not correct\"= setequal(digest(paste(toString(sort(filter(marathon_recipe$var_info, role == 'predictor')$variable)), \"5548\")), \"f9faad8a2551549f2fad9412027cc626\"))\n", + "stopifnot(\"marathon_recipe does not contain the correct data, might need to be standardized\"= setequal(digest(paste(toString(round(sum(bake(prep(marathon_recipe), marathon_recipe$template) %>% select_if(is.numeric), na.rm = TRUE), 2)), \"5548\")), \"90fd9c3ee65ef9b8152e366cdf2e0feb\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "b591bde125171c97afe36a1a6c5a527b", + "grade": false, + "grade_id": "cell-9bbc49f6977aa3fc", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 7.1**\n", + "
{points: 1}\n", + "\n", + "Now, create the splits for cross-validation with *5 folds* using the `vfold_cv` function. Store your answer in an object called `marathon_vfold`. Make sure to set the `strata` argument.\n", + "\n", + "Then, use the `workflow` function to combine your model specification and recipe. Store your answer in an object called `marathon_workflow`. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "c551fd220ee0f022dd4d1aa49e0a74b9", + "grade": false, + "grade_id": "cell-7cf222d5ae9a2f89", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "set.seed(1234) # DO NOT REMOVE\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "marathon_workflow" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "71e14b2b9fcdc15f2d8cb79ad3b1d5a0", + "grade": true, + "grade_id": "cell-88d9f578265d6294", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"marathon_vfold should be a cross validation object\"= setequal(digest(paste(toString('vfold_cv' %in% class(marathon_vfold)), \"ab229\")), \"c3bddbd841824b3cf5b9b336372855ed\"))\n", + "stopifnot(\"number of folds is not correct\"= setequal(digest(paste(toString(length(marathon_vfold$id)), \"ab229\")), \"55c11bef663fb02f72bfcf7930692743\"))\n", + "stopifnot(\"data used is not correct\"= setequal(digest(paste(toString(dim(marathon_vfold)), \"ab229\")), \"c7bcbe7a4a4fbb68b454853bb1e50224\"))\n", + "stopifnot(\"training data is not used\"= setequal(digest(paste(toString(if (any(sapply(marathon_vfold$splits[[1]]$data, is.numeric))) {round(sapply(marathon_vfold$splits[[1]]$data[, sapply(marathon_vfold$splits[[1]]$data, is.numeric)], sum, na.rm = TRUE), 2)}), \"ab229\")), \"6b9c2e796cadf9494d1d87b4e1c00c59\"))\n", + "stopifnot(\"strata argument is not correct\"= setequal(digest(paste(toString(sapply(seq_along(marathon_vfold$splits), function(i) {round(sum(marathon_vfold$splits[[i]]$in_id), 2)})), \"ab229\")), \"b4f23b7d7a8ecce1e7d5a731d02797ce\"))\n", + "\n", + "stopifnot(\"marathon_workflow should be a workflow\"= setequal(digest(paste(toString('workflow' %in% class(marathon_workflow)), \"ab22a\")), \"8d7b4e8c4a7419dbffe358719d5d9bb9\"))\n", + "stopifnot(\"computational engine used in marathon_workflow is not correct\"= setequal(digest(paste(toString(marathon_workflow$fit$actions$model$spec$engine), \"ab22a\")), \"de5ef4970762f2d63df8ccc9ad6aaf1b\"))\n", + "stopifnot(\"model specification used in marathon_workflow is not correct\"= setequal(digest(paste(toString(marathon_workflow$fit$actions$model$spec$mode), \"ab22a\")), \"dcb0e951e2b9778ffc9d472a93e87e47\"))\n", + "stopifnot(\"marathon_workflow must be a trained workflow, make sure to call the fit() function\"= setequal(digest(paste(toString(marathon_workflow$trained), \"ab22a\")), \"95e32306c93c330c5d5720f91540b6bd\"))\n", + "stopifnot(\"predictor variable(s) of marathon_workflow are not correct\"= setequal(digest(paste(toString(sort(filter(marathon_workflow$pre$actions$recipe$recipe$var_info, role == 'predictor')$variable)), \"ab22a\")), \"8bd9531610d94899e636e21a69c93095\"))\n", + "stopifnot(\"marathon_workflow does not contain the correct data\"= setequal(digest(paste(toString(sort(vapply(marathon_workflow$pre$mold$predictors[, sapply(marathon_workflow$pre$mold$predictors, is.numeric)], function(col) if(!is.null(col)) round(sum(col), 2) else NA_real_, numeric(1)), na.last = NA)), \"ab22a\")), \"1262d944405c553c4bb54ec1a09cb57f\"))\n", + "stopifnot(\"did not fit marathon_workflow on the training dataset\"= setequal(digest(paste(toString(nrow(marathon_workflow$pre$mold$outcomes)), \"ab22a\")), \"1262d944405c553c4bb54ec1a09cb57f\"))\n", + "stopifnot(\"for classification/regression models, weight function is not correct\"= setequal(digest(paste(toString(quo_name(marathon_workflow$fit$actions$model$spec$args$weight_func)), \"ab22a\")), \"a84b6b776aa8868e397d0f7942c84d8e\"))\n", + "stopifnot(\"for classification/regression models, response variable of marathon_workflow is not correct\"= setequal(digest(paste(toString(sort(filter(marathon_workflow$pre$actions$recipe$recipe$var_info, role == 'outcome')$variable)), \"ab22a\")), \"802f4cdc88968d5e300ba52cf0b5310f\"))\n", + "stopifnot(\"for KNN models, number of neighbours is not correct\"= setequal(digest(paste(toString(quo_name(marathon_workflow$fit$actions$model$spec$args$neighbors)), \"ab22a\")), \"011cdad6fa1b1ee0c92e3621303b70ed\"))\n", + "stopifnot(\"for clustering models, the clustering is not correct\"= setequal(digest(paste(toString(marathon_workflow$fit$fit$fit$cluster), \"ab22a\")), \"1262d944405c553c4bb54ec1a09cb57f\"))\n", + "stopifnot(\"for clustering models, the total within-cluster sum-of-squared distances is not correct\"= setequal(digest(paste(toString(if (!is.null(marathon_workflow$fit$fit$fit$tot.withinss)) round(marathon_workflow$fit$fit$fit$tot.withinss, 2) else NULL), \"ab22a\")), \"1262d944405c553c4bb54ec1a09cb57f\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "06d081b570a555d6fdd7bddae615221f", + "grade": false, + "grade_id": "cell-7cd5193eecc27c30", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "**Question 8.0**\n", + "
{points: 1}\n", + "\n", + "If you haven't noticed by now, the major difference compared to other workflows from Chapters 6 and 7 is that we are running a *regression* rather than a *classification*. Specifying *regression* in the `set_mode` function essentially tells `tidymodels` that we need to use different metrics (RMPSE rather than accuracy) for tuning and evaluation. \n", + "\n", + "Now, let's use the RMSPE to find the best setting for $k$ from our workflow. Let's test the values $k = 1, 11, 21, 31, ..., 81$.\n", + "\n", + "First, create a tibble with a column called `neighbors` that contains the sequence of values. Remember that you should use the `seq` function to create the range of $k$s that goes *from* 1 *to* 81 *by jumps of* 10. Assign that tibble to an object called `gridvals`. \n", + "\n", + "Next, tune your workflow such that it tests all the values in `gridvals` and *resamples* using your cross-validation data set. Finally, collect the statistics from your model. Assign your answer to an object called `marathon_results`.\n", + "\n", + "*Note: For this question, do not apply any filters to the metrics. Print all metrics exactly as returned by the function.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "79b57741a3f0d43055e78fc0f333749c", + "grade": false, + "grade_id": "cell-8c7e7ded673d28ec", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "set.seed(2019) # DO NOT CHANGE\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "marathon_results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "257bf5db6b9b79ff1520b860c8e3f888", + "grade": true, + "grade_id": "cell-29d608ebfdd11366", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"gridvals should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(gridvals)), \"9e09\")), \"e3785f29ad31e6528294ba45bd185310\"))\n", + "stopifnot(\"dimensions of gridvals are not correct\"= setequal(digest(paste(toString(dim(gridvals)), \"9e09\")), \"74028ed922d45a6a780d2f5306e7d24e\"))\n", + "stopifnot(\"column names of gridvals are not correct\"= setequal(digest(paste(toString(sort(colnames(gridvals))), \"9e09\")), \"14b2d11c9f26fcd55788479cedad5474\"))\n", + "stopifnot(\"types of columns in gridvals are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(gridvals, class)))), \"9e09\")), \"e36ee1654fef221fccb6f406dedeef5e\"))\n", + "stopifnot(\"values in one or more numerical columns in gridvals are not correct\"= setequal(digest(paste(toString(if (any(sapply(gridvals, is.numeric))) sort(round(sapply(gridvals[, sapply(gridvals, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"9e09\")), \"143d0c8eb4893dcebe4b08f79c11636c\"))\n", + "stopifnot(\"values in one or more character columns in gridvals are not correct\"= setequal(digest(paste(toString(if (any(sapply(gridvals, is.character))) sum(sapply(gridvals[sapply(gridvals, is.character)], function(x) length(unique(x)))) else 0), \"9e09\")), \"78cc6b154549e7981035d4e9982eee20\"))\n", + "stopifnot(\"values in one or more factor columns in gridvals are not correct\"= setequal(digest(paste(toString(if (any(sapply(gridvals, is.factor))) sum(sapply(gridvals[, sapply(gridvals, is.factor)], function(col) length(unique(col)))) else 0), \"9e09\")), \"78cc6b154549e7981035d4e9982eee20\"))\n", + "\n", + "stopifnot(\"marathon_results should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon_results)), \"9e0a\")), \"df7374252434fb128d5f92ddd0f81b58\"))\n", + "stopifnot(\"dimensions of marathon_results are not correct\"= setequal(digest(paste(toString(dim(marathon_results)), \"9e0a\")), \"4b785364ac3b2b46c601276c8462a845\"))\n", + "stopifnot(\"column names of marathon_results are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon_results))), \"9e0a\")), \"7855f48ba3ceba568928de2dcac36b0d\"))\n", + "stopifnot(\"types of columns in marathon_results are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon_results, class)))), \"9e0a\")), \"5d450389a632649592270bf8862bd544\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon_results are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_results, is.numeric))) sort(round(sapply(marathon_results[, sapply(marathon_results, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"9e0a\")), \"93540c0b281129461265ef8b0b6c48be\"))\n", + "stopifnot(\"values in one or more character columns in marathon_results are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_results, is.character))) sum(sapply(marathon_results[sapply(marathon_results, is.character)], function(x) length(unique(x)))) else 0), \"9e0a\")), \"ea80b90301e9ed819d58d7909bfeead0\"))\n", + "stopifnot(\"values in one or more factor columns in marathon_results are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_results, is.factor))) sum(sapply(marathon_results[, sapply(marathon_results, is.factor)], function(col) length(unique(col)))) else 0), \"9e0a\")), \"f6143329c548deabbac24c3034b08ebf\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "ffc7314876aa53957c34b549d51a8367", + "grade": false, + "grade_id": "cell-35eaea9c384c5f6c", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "**Question 8.1**\n", + "
{points: 1}\n", + "\n", + "Great! Now find the *minimum* RMSPE along with it's associated metrics such as the mean and standard error, to help us find the number of neighbors that will serve as our best $k$ value. Your answer should simply be a tibble with one row. Assign your answer to an object called `marathon_min`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "99a6de5bf9d8d8c29eb0df4704b4e848", + "grade": false, + "grade_id": "cell-f9738d45da9e8a77", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "set.seed(2020) # DO NOT REMOVE\n", + "\n", + "#... <- marathon_results |>\n", + "# filter(.metric == ...) |>\n", + "# slice_min(..., ...)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "marathon_min" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "236379001bc83be21ff82da0403ca93a", + "grade": true, + "grade_id": "cell-a1e34e1fb62d79f5", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"marathon_min should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon_min)), \"33a4\")), \"40b5e33777ec7eb065f043fa71f535a6\"))\n", + "stopifnot(\"dimensions of marathon_min are not correct\"= setequal(digest(paste(toString(dim(marathon_min)), \"33a4\")), \"38b61400a91856750685827d24948776\"))\n", + "stopifnot(\"column names of marathon_min are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon_min))), \"33a4\")), \"e6b0259c3aaa121f0abdc46cafe6e2c2\"))\n", + "stopifnot(\"types of columns in marathon_min are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon_min, class)))), \"33a4\")), \"991d6e89327c147fc4962bd227ea29ba\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon_min are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_min, is.numeric))) sort(round(sapply(marathon_min[, sapply(marathon_min, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"33a4\")), \"0b8ad5b8e14a65e2bdb202f878d3aa33\"))\n", + "stopifnot(\"values in one or more character columns in marathon_min are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_min, is.character))) sum(sapply(marathon_min[sapply(marathon_min, is.character)], function(x) length(unique(x)))) else 0), \"33a4\")), \"066a3d400801f50d6cfbbb54e8a91f62\"))\n", + "stopifnot(\"values in one or more factor columns in marathon_min are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_min, is.factor))) sum(sapply(marathon_min[, sapply(marathon_min, is.factor)], function(col) length(unique(col)))) else 0), \"33a4\")), \"d9bdf2992b7d868327011d545b756c8c\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "059df927a68d576994b89a53009dd594", + "grade": false, + "grade_id": "cell-28a414eb59f5ae20", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "**Question 8.2**\n", + "
{points: 1}\n", + "\n", + "To assess how well our model might do at predicting on unseen data, we will assess its RMSPE on the test data. To do this, we will first re-train our $k$-nn regression model on the entire training data set, using the $K$ value we obtained from **Question 8.1**. \n", + "\n", + "To start, pull the best `neighbors` value from `marathon_min` and store it an object called `k_min`. \n", + "\n", + "Following that, we will repeat the workflow analysis again but with a brand new model specification with `k_min`. Remember, we are doing a regression analysis, so please select the appropriate mode. Store your new model specification in an object called `marathon_best_spec` and your new workflow analysis in an object called `marathon_best_fit`. You can reuse this `marathon_recipe` for this workflow, as we do not need to change it for this task.\n", + "\n", + "Then, we will use the `predict` function to make predictions on the test data, and use the `metrics` function again to compute a summary of the regression's quality. Store your answer in an object called `marathon_summary`. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "8d9c278960e85de2b5b718d977d715cd", + "grade": false, + "grade_id": "cell-a4de6046a0bd5f96", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "set.seed(1234) # DO NOT REMOVE\n", + "\n", + "#... <- marathon_min |>\n", + "# pull(...)\n", + "\n", + "#... <- nearest_neighbor(weight_func = ..., neighbors = ...) |>\n", + "# set_engine(...) |>\n", + "# set_mode(...)\n", + "\n", + "#... <- workflow() |>\n", + "# add_recipe(...) |>\n", + "# add_model(...) |>\n", + "# fit(data = ...)\n", + "\n", + "#... <- marathon_best_fit |>\n", + "# predict(...) |>\n", + "# bind_cols(...) |>\n", + "# metrics(truth = ..., estimate = ...)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "marathon_summary" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "9059e615b6cdd649371f38389a634ab3", + "grade": true, + "grade_id": "cell-94fab75dca459b65", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of k_min is not numeric\"= setequal(digest(paste(toString(class(k_min)), \"550f6\")), \"01c662532c057a22b2b9cb927ea3aa3f\"))\n", + "stopifnot(\"value of k_min is not correct (rounded to 2 decimal places)\"= setequal(digest(paste(toString(round(k_min, 2)), \"550f6\")), \"5b34b812d262ed0bf637355b4e5408ce\"))\n", + "stopifnot(\"length of k_min is not correct\"= setequal(digest(paste(toString(length(k_min)), \"550f6\")), \"f23949a15452933ed5d3171f2d85ddf5\"))\n", + "stopifnot(\"values of k_min are not correct\"= setequal(digest(paste(toString(sort(round(k_min, 2))), \"550f6\")), \"5b34b812d262ed0bf637355b4e5408ce\"))\n", + "\n", + "stopifnot(\"marathon_best_spec should be a model specification\"= setequal(digest(paste(toString('model_spec' %in% class(marathon_best_spec)), \"550f7\")), \"71cbb182aea5ad1ec22f3e1d3149f5a7\"))\n", + "stopifnot(\"model specification in marathon_best_spec is not correct\"= setequal(digest(paste(toString(marathon_best_spec$mode), \"550f7\")), \"ba973cd2e3bbdcb580ce21682cbbed34\"))\n", + "stopifnot(\"computational engine in marathon_best_spec is not correct\"= setequal(digest(paste(toString(marathon_best_spec$engine), \"550f7\")), \"03bd61c48336305c8fe6492179a01fa5\"))\n", + "stopifnot(\"weight function in marathon_best_spec is not correct\"= setequal(digest(paste(toString(quo_name(marathon_best_spec$args$weight_func)), \"550f7\")), \"14ab6f06ac87b4cdfc2197cf474d4fe5\"))\n", + "stopifnot(\"number of neighbours in marathon_best_spec is not correct\"= setequal(digest(paste(toString(quo_name(marathon_best_spec$args$neighbors)), \"550f7\")), \"fec54ae3124ff36fc32e618112b69461\"))\n", + "\n", + "stopifnot(\"marathon_best_fit should be a workflow\"= setequal(digest(paste(toString('workflow' %in% class(marathon_best_fit)), \"550f8\")), \"c278370a8060d698d5d49b79c4d8b9f7\"))\n", + "stopifnot(\"computational engine used in marathon_best_fit is not correct\"= setequal(digest(paste(toString(marathon_best_fit$fit$actions$model$spec$engine), \"550f8\")), \"9a4ca5e5b6ba68241af1ae159b63e8c6\"))\n", + "stopifnot(\"model specification used in marathon_best_fit is not correct\"= setequal(digest(paste(toString(marathon_best_fit$fit$actions$model$spec$mode), \"550f8\")), \"9013407379761a62f44a4853cbf09d68\"))\n", + "stopifnot(\"marathon_best_fit must be a trained workflow, make sure to call the fit() function\"= setequal(digest(paste(toString(marathon_best_fit$trained), \"550f8\")), \"c278370a8060d698d5d49b79c4d8b9f7\"))\n", + "stopifnot(\"predictor variable(s) of marathon_best_fit are not correct\"= setequal(digest(paste(toString(sort(filter(marathon_best_fit$pre$actions$recipe$recipe$var_info, role == 'predictor')$variable)), \"550f8\")), \"0bb1a7322ed0ac79cbb99ece1bed5fe8\"))\n", + "stopifnot(\"marathon_best_fit does not contain the correct data\"= setequal(digest(paste(toString(sort(vapply(marathon_best_fit$pre$mold$predictors[, sapply(marathon_best_fit$pre$mold$predictors, is.numeric)], function(col) if(!is.null(col)) round(sum(col), 2) else NA_real_, numeric(1)), na.last = NA)), \"550f8\")), \"5996592f0d4539ed7cdde9924dd1c180\"))\n", + "stopifnot(\"did not fit marathon_best_fit on the training dataset\"= setequal(digest(paste(toString(nrow(marathon_best_fit$pre$mold$outcomes)), \"550f8\")), \"f77ac93555ebf5acd29489e116cfe1f3\"))\n", + "stopifnot(\"for classification/regression models, weight function is not correct\"= setequal(digest(paste(toString(quo_name(marathon_best_fit$fit$actions$model$spec$args$weight_func)), \"550f8\")), \"38d7480d57d211b785f6957842d1cc37\"))\n", + "stopifnot(\"for classification/regression models, response variable of marathon_best_fit is not correct\"= setequal(digest(paste(toString(sort(filter(marathon_best_fit$pre$actions$recipe$recipe$var_info, role == 'outcome')$variable)), \"550f8\")), \"1b3320cc17789ad0a707b28c1833b4ba\"))\n", + "stopifnot(\"for KNN models, number of neighbours is not correct\"= setequal(digest(paste(toString(quo_name(marathon_best_fit$fit$actions$model$spec$args$neighbors)), \"550f8\")), \"6c7d30dbf17d893256cca52b8803fdbc\"))\n", + "stopifnot(\"for clustering models, the clustering is not correct\"= setequal(digest(paste(toString(marathon_best_fit$fit$fit$fit$cluster), \"550f8\")), \"a3b5926535d0e0cb6c4b76c762df3a21\"))\n", + "stopifnot(\"for clustering models, the total within-cluster sum-of-squared distances is not correct\"= setequal(digest(paste(toString(if (!is.null(marathon_best_fit$fit$fit$fit$tot.withinss)) round(marathon_best_fit$fit$fit$fit$tot.withinss, 2) else NULL), \"550f8\")), \"a3b5926535d0e0cb6c4b76c762df3a21\"))\n", + "\n", + "stopifnot(\"marathon_summary should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon_summary)), \"550f9\")), \"8510d8378e729a04fefcf81210f014ad\"))\n", + "stopifnot(\"dimensions of marathon_summary are not correct\"= setequal(digest(paste(toString(dim(marathon_summary)), \"550f9\")), \"08fa1b5bd8cded25763a7850d373ec09\"))\n", + "stopifnot(\"column names of marathon_summary are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon_summary))), \"550f9\")), \"8d5530627575e8e898130cc2baea6b47\"))\n", + "stopifnot(\"types of columns in marathon_summary are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon_summary, class)))), \"550f9\")), \"2b01203f0e2844f3ba994d132f86283f\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon_summary are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_summary, is.numeric))) sort(round(sapply(marathon_summary[, sapply(marathon_summary, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"550f9\")), \"490760f22419c4660094e8f16d1c9718\"))\n", + "stopifnot(\"values in one or more character columns in marathon_summary are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_summary, is.character))) sum(sapply(marathon_summary[sapply(marathon_summary, is.character)], function(x) length(unique(x)))) else 0), \"550f9\")), \"0b83a45fc9152b6322604a7124e0f87d\"))\n", + "stopifnot(\"values in one or more factor columns in marathon_summary are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_summary, is.factor))) sum(sapply(marathon_summary[, sapply(marathon_summary, is.factor)], function(col) length(unique(col)))) else 0), \"550f9\")), \"c67c2b0da350f0e27a9914f9e81220ab\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "1feec2b034696ca26148ef224854254e", + "grade": false, + "grade_id": "cell-dffef3173aff9b72", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "What does this RMSPE score mean? RMSPE is measured in the units of the target/response variable, so it can sometimes be a bit hard to interpret. But in this case, we know that a typical marathon race time is somewhere between 3 - 5 hours. So this model allows us to predict a runner's race time up to about +/-0.6 of an hour, or +/- 36 minutes. This is not *fantastic*, but not *terrible* either. We can certainly use the model to determine roughly whether an athlete will have a bad, good, or excellent race time, but probably cannot reliably distinguish between athletes of a similar caliber.\n", + "\n", + "For now, let’s consider this approach to thinking about RMSPE from our testing data set: as long as its not significantly worse than the cross-validation RMSPE of our best model (**Question 8.1**), then we can say that we’re not doing too much worse on the test data than we did on the training data. In future courses on statistical/machine learning, you will learn more about how to interpret RMSPE from testing data and other ways to assess models. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "553011861ab39af59d82c7ff28c78d80", + "grade": false, + "grade_id": "cell-ed97bb769cc923e5", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 8.3** True or False:\n", + "
{points: 1}\n", + "\n", + "The RMPSE from our testing data set is *much worse* than the cross-validation RMPSE of our best model. \n", + "\n", + "*Assign your answer to an object named `answer8.3`. Make sure your answer is in lowercase and is surrounded by quotation marks (e.g. `\"true\"` or `\"false\"`).*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "57ecd2f6ac40fa40cd040ffc75fc7ce5", + "grade": false, + "grade_id": "cell-24ccc13552b0fff1", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "099fe7f9c58ee09e8ce13f4795801881", + "grade": true, + "grade_id": "cell-f39ed1223c189ec6", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer8.3 is not character\"= setequal(digest(paste(toString(class(answer8.3)), \"aa898\")), \"fdfbb8dbf9e39738698be2e00f17a87d\"))\n", + "stopifnot(\"length of answer8.3 is not correct\"= setequal(digest(paste(toString(length(answer8.3)), \"aa898\")), \"a40dcc06a77707553cc5d3e1acdc02dd\"))\n", + "stopifnot(\"value of answer8.3 is not correct\"= setequal(digest(paste(toString(tolower(answer8.3)), \"aa898\")), \"3463dffe41b3e1babe842b2dfef22ebe\"))\n", + "stopifnot(\"letters in string value of answer8.3 are correct but case is not correct\"= setequal(digest(paste(toString(answer8.3), \"aa898\")), \"3463dffe41b3e1babe842b2dfef22ebe\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "35a07e04ba2aae150c37aad410e9c317", + "grade": false, + "grade_id": "cell-f527fba0cc1ca89d", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "**Question 9.0**\n", + "
{points: 1}\n", + "\n", + "Let's visualize what the relationship between `max` and `time_hrs` looks like with our best $k$ value to ultimately explore how the $k$ value affects $k$-nn regression.\n", + "\n", + "To do so, use the `predict` function on the workflow analysis that utilizes the best $k$ value (`marathon_best_fit`) to create predictions for the `marathon_training` data. Then, add the column of predictions to the `marathon_training` data frame using `bind_cols`. Name the resulting data frame `marathon_preds`.\n", + "\n", + "Next, create a scatterplot with the maximum distance ran per week against the marathon time from `marathon_preds`. Assign your plot to an object called `marathon_plot`. **Plot the predictions as a blue line over the data points.** Remember the fundamentals of effective visualizations such as having a **title** and **human-readable axes**. \n", + "\n", + "*Note: use `geom_point` before `geom_line` when creating the plot to ensure tests pass!*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "dd315f90974d6f742929f677f91a1807", + "grade": false, + "grade_id": "cell-e623a65f902a7e98", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "set.seed(2019) # DO NOT CHANGE\n", + "\n", + "options(repr.plot.width = 7, repr.plot.height = 7)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "marathon_plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "742eba48e65439750fb6ae57132c6288", + "grade": true, + "grade_id": "cell-5eff9c974a058bdf", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of plot is not correct (if you are using two types of geoms, try flipping the order of the geom objects!)\"= setequal(digest(paste(toString(sapply(seq_len(length(marathon_plot$layers)), function(i) {c(class(marathon_plot$layers[[i]]$geom))[1]})), \"b9743\")), \"d56026378896396ba628c5624640af83\"))\n", + "stopifnot(\"variable x is not correct\"= setequal(digest(paste(toString(unlist(lapply(sapply(seq_len(length(marathon_plot$layers)), function(i) {rlang::get_expr(c(marathon_plot$layers[[i]]$mapping, marathon_plot$mapping)$x)}), as.character))), \"b9743\")), \"a7b5ce253cd34ddb441f3328ae587132\"))\n", + "stopifnot(\"variable y is not correct\"= setequal(digest(paste(toString(unlist(lapply(sapply(seq_len(length(marathon_plot$layers)), function(i) {rlang::get_expr(c(marathon_plot$layers[[i]]$mapping, marathon_plot$mapping)$y)}), as.character))), \"b9743\")), \"df7f01f9cbc56804207c66b99869848c\"))\n", + "stopifnot(\"x-axis label is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_plot$layers[[1]]$mapping, marathon_plot$mapping)$x)!= marathon_plot$labels$x), \"b9743\")), \"626430ae9d9877f3617df01c7f947285\"))\n", + "stopifnot(\"y-axis label is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_plot$layers[[1]]$mapping, marathon_plot$mapping)$y)!= marathon_plot$labels$y), \"b9743\")), \"626430ae9d9877f3617df01c7f947285\"))\n", + "stopifnot(\"incorrect colour variable in marathon_plot, specify a correct one if required\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_plot$layers[[1]]$mapping, marathon_plot$mapping)$colour)), \"b9743\")), \"6e54f7c10e27f2dd167e178dadf2b08d\"))\n", + "stopifnot(\"incorrect shape variable in marathon_plot, specify a correct one if required\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_plot$layers[[1]]$mapping, marathon_plot$mapping)$shape)), \"b9743\")), \"6e54f7c10e27f2dd167e178dadf2b08d\"))\n", + "stopifnot(\"the colour label in marathon_plot is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_plot$layers[[1]]$mapping, marathon_plot$mapping)$colour) != marathon_plot$labels$colour), \"b9743\")), \"6e54f7c10e27f2dd167e178dadf2b08d\"))\n", + "stopifnot(\"the shape label in marathon_plot is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_plot$layers[[1]]$mapping, marathon_plot$mapping)$colour) != marathon_plot$labels$shape), \"b9743\")), \"6e54f7c10e27f2dd167e178dadf2b08d\"))\n", + "stopifnot(\"fill variable in marathon_plot is not correct\"= setequal(digest(paste(toString(quo_name(marathon_plot$mapping$fill)), \"b9743\")), \"030c9e71d2ec4fea75b9143c74f57384\"))\n", + "stopifnot(\"fill label in marathon_plot is not informative\"= setequal(digest(paste(toString((quo_name(marathon_plot$mapping$fill) != marathon_plot$labels$fill)), \"b9743\")), \"6e54f7c10e27f2dd167e178dadf2b08d\"))\n", + "stopifnot(\"position argument in marathon_plot is not correct\"= setequal(digest(paste(toString(class(marathon_plot$layers[[1]]$position)[1]), \"b9743\")), \"5080a849493eb59c49af507ee557edd2\"))\n", + "\n", + "stopifnot(\"marathon_plot$data should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon_plot$data)), \"b9744\")), \"ff94263ebc353ae0e93a3610dae0c0c7\"))\n", + "stopifnot(\"dimensions of marathon_plot$data are not correct\"= setequal(digest(paste(toString(dim(marathon_plot$data)), \"b9744\")), \"bdb3587ec0317d8baf1dac70b56564a9\"))\n", + "stopifnot(\"column names of marathon_plot$data are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon_plot$data))), \"b9744\")), \"f908eda84c0f11bb1c252c99eb5de321\"))\n", + "stopifnot(\"types of columns in marathon_plot$data are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon_plot$data, class)))), \"b9744\")), \"5f21a4c36da13ec62b20ea7ebd9c6380\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon_plot$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_plot$data, is.numeric))) sort(round(sapply(marathon_plot$data[, sapply(marathon_plot$data, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"b9744\")), \"1e47fdb80dba2897519f48a743bcfcd7\"))\n", + "stopifnot(\"values in one or more character columns in marathon_plot$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_plot$data, is.character))) sum(sapply(marathon_plot$data[sapply(marathon_plot$data, is.character)], function(x) length(unique(x)))) else 0), \"b9744\")), \"d6d21b5fdf47ab635b546662cb873731\"))\n", + "stopifnot(\"values in one or more factor columns in marathon_plot$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_plot$data, is.factor))) sum(sapply(marathon_plot$data[, sapply(marathon_plot$data, is.factor)], function(col) length(unique(col)))) else 0), \"b9744\")), \"d6d21b5fdf47ab635b546662cb873731\"))\n", + "\n", + "stopifnot(\"type of is.character(marathon_plot$labels$title) is not logical\"= setequal(digest(paste(toString(class(is.character(marathon_plot$labels$title))), \"b9745\")), \"5f2cdb5e0db0082e3ba2c51886d533f9\"))\n", + "stopifnot(\"logical value of is.character(marathon_plot$labels$title) is not correct\"= setequal(digest(paste(toString(is.character(marathon_plot$labels$title)), \"b9745\")), \"a2d537ac5f121c20197a0b3c01f19b6a\"))\n", + "\n", + "stopifnot(\"type of as.character(marathon_plot$layers[[2]]$aes_params) is not character\"= setequal(digest(paste(toString(class(as.character(marathon_plot$layers[[2]]$aes_params))), \"b9746\")), \"2837cd9a472e7f8bf026f283bcdfeb4b\"))\n", + "stopifnot(\"length of as.character(marathon_plot$layers[[2]]$aes_params) is not correct\"= setequal(digest(paste(toString(length(as.character(marathon_plot$layers[[2]]$aes_params))), \"b9746\")), \"4721e09e2f46499eda0e022ab36a0863\"))\n", + "stopifnot(\"value of as.character(marathon_plot$layers[[2]]$aes_params) is not correct\"= setequal(digest(paste(toString(tolower(as.character(marathon_plot$layers[[2]]$aes_params))), \"b9746\")), \"f748724dc18e96f9604fe136f112f1ca\"))\n", + "stopifnot(\"letters in string value of as.character(marathon_plot$layers[[2]]$aes_params) are correct but case is not correct\"= setequal(digest(paste(toString(as.character(marathon_plot$layers[[2]]$aes_params)), \"b9746\")), \"f748724dc18e96f9604fe136f112f1ca\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "source('cleanup.R')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "4.3.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/materials/R/worksheet_regression1/cleanup.R b/materials/R/worksheet_regression1/cleanup.R new file mode 100644 index 0000000..5ccfed4 --- /dev/null +++ b/materials/R/worksheet_regression1/cleanup.R @@ -0,0 +1,2 @@ +# clean up data files that students output + diff --git a/materials/R/worksheet_regression1/data/marathon.csv b/materials/R/worksheet_regression1/data/marathon.csv new file mode 100644 index 0000000..d509d97 --- /dev/null +++ b/materials/R/worksheet_regression1/data/marathon.csv @@ -0,0 +1,930 @@ +age,bmi,female,footwear,group,injury,mf_d,mf_di,mf_ti,max,sprint,mf_s,time_hrs +35,23.5923233,0,2,1,2,42195,4,10295,60,1,4.098591549295775,2.8597222222222225 +33,22.51829529,0,2,2,2,42195,3,12292,50,0,3.432720468597462,3.4144444444444444 +38,25.56031227,0,2,3,1,42195,4,10980,65,0,3.842896174863388,3.05 +34,22.60793114,0,2,1,1,42195,3,10694,88,1,3.945670469422106,2.9705555555555554 +39,24.97483635,0,2,1,1,42195,2,13452,51,0,3.136708296164139,3.7366666666666664 +33,24.30183029,1,2,2,1,42195,3,14940,40,0,2.82429718875502,4.15 +34,24.57002449,0,1,3,1,42195,3,10747,75,1,3.926211966130083,2.9852777777777777 +53,23.16774559,0,2,1,1,42195,3,10875,45,1,3.88,3.0208333333333335 +34,23.58257103,1,2,2,1,42195,3,16580,39,0,2.5449336550060315,4.605555555555555 +44,20.03506279,1,2,3,1,42195,2,15440,45,1,2.732836787564767,4.288888888888889 +27,29.88360977,0,1,1,1,42195,4,14430,28,1,2.924116424116424,4.008333333333334 +27,21.38561058,0,2,2,1,42195,2,9475,110,1,4.453298153034301,2.631944444444444 +39,29.73737335,0,2,1,2,42195,4,13113,52,0,3.2177991306337224,3.6425 +32,29.21779823,1,2,3,1,42195,2,17190,55,0,2.4546247818499127,4.775 +24,21.70493698,1,2,3,2,42195,2,12701,50,1,3.322179355956224,3.5280555555555555 +35,30.05259323,1,2,1,3,42195,3,22139,30,0,1.9059126428474638,6.1497222222222225 +63,24.48979568,0,2,2,2,42195,2,15172,30,0,2.7811099393619827,4.214444444444444 +47,24.55106735,0,2,3,2,42195,2,14416,52,1,2.9269561598224194,4.004444444444444 +28,18.86635017,1,2,3,1,42195,2,11707,62,0,3.6042538652088494,3.2519444444444447 +42,25.10309982,0,2,2,1,42195,3,14813,30,0,2.848511442651725,4.114722222222222 +58,26.4293766,0,1,3,1,42195,2,14071,70,0,2.9987207732215193,3.9086111111111115 +33,21.75764084,1,2,3,2,42195,3,13337,120,0,3.1637549673839693,3.7047222222222222 +33,22.51829529,0,1,3,1,42195,3,9508,135,1,4.437841817416912,2.641111111111111 +24,22.53944397,1,2,2,1,42195,4,15420,30,0,2.7363813229571985,4.283333333333333 +47,22.49134827,0,2,1,1,42195,4,12312,40,1,3.4271442495126707,3.42 +26,25.25252533,1,2,1,2,42195,2,16899,34,1,2.4968933072962898,4.694166666666666 +52,23.80480194,0,1,1,1,42195,4,13257,40,1,3.182846797918081,3.6824999999999997 +33,28.2003727,1,2,2,1,42195,3,18484,15,0,2.2827851114477387,5.134444444444444 +50,19.17613602,1,2,2,1,42195,2,10718,80,1,3.936835230453443,2.977222222222222 +27,20.77414703,1,2,2,1,42195,2,12095,70,1,3.488631665977677,3.3597222222222225 +47,25.24751663,0,3,1,1,42195,2,10757,60,1,3.9225620526169007,2.9880555555555555 +28,20.91324425,1,1,1,1,42195,2,10888,86,1,3.875367376928729,3.0244444444444443 +37,27.4961338,0,2,3,1,42195,2,14700,42,0,2.870408163265306,4.083333333333333 +31,22.53840446,0,1,2,2,42195,3,12323,45,1,3.424085044226244,3.4230555555555555 +31,24.52615929,0,1,3,1,42195,2,10778,7,0,3.914919280014845,2.993888888888889 +25,24.95965385,1,1,2,1,42195,3,14985,45,1,2.815815815815816,4.1625 +28,19.92750168,1,2,3,3,42195,2,12151,55,0,3.4725536992840094,3.375277777777778 +43,25.24778557,0,2,2,1,42195,3,17400,36,0,2.425,4.833333333333333 +26,19.86567116,1,2,2,1,42195,2,10461,94,1,4.033553197591052,2.9058333333333333 +30,20.57555008,1,2,2,1,42195,4,12719,72,1,3.317477789134366,3.5330555555555554 +36,24.34137344,0,2,3,1,42195,3,13530,51,1,3.1186252771618626,3.7583333333333333 +34,27.51718903,0,2,2,1,42195,3,14939,43,0,2.824486244059174,4.149722222222222 +40,28.44444466,0,2,1,1,42195,4,11611,82,1,3.6340539143915254,3.225277777777778 +38,23.37423897,1,2,2,2,42195,3,13690,35,0,3.0821767713659605,3.8027777777777776 +37,23.84960938,0,2,3,1,42195,3,12566,42,1,3.3578704440553877,3.4905555555555554 +40,21.5213356,0,2,2,1,42195,3,10698,70,1,3.9441951766685364,2.9716666666666667 +33,21.3521862,0,2,1,1,42195,3,13040,40,1,3.235812883435583,3.6222222222222222 +54,21.70465088,0,2,1,1,42195,4,12641,52,0,3.3379479471560796,3.511388888888889 +38,23.80480194,0,1,3,1,42195,3,13860,50,0,3.044372294372294,3.85 +29,18.63140297,1,2,3,1,42195,4,13632,51,1,3.0952904929577465,3.7866666666666666 +31,24.37873268,0,1,3,1,42195,3,15420,34,1,2.7363813229571985,4.283333333333333 +48,28.05836296,0,2,2,1,42195,3,18300,30,0,2.305737704918033,5.083333333333333 +34,20.19801331,0,2,1,1,42195,3,11453,70,1,3.6841875491137692,3.181388888888889 +44,25.20478821,0,2,2,1,42195,4,13458,55,1,3.135309852875613,3.7383333333333337 +64,24.81470108,0,2,1,1,42195,3,14807,60,1,2.849665698656041,4.1130555555555555 +55,25.11189079,0,2,1,1,42195,3,14188,50,1,2.973992106005075,3.9411111111111112 +28,19.66026878,0,2,1,2,42195,4,10080,70,0,4.186011904761905,2.8 +36,22.60793114,0,1,2,1,42195,3,10040,70,1,4.202689243027889,2.788888888888889 +29,27.19741821,1,2,1,2,42195,3,17341,44,0,2.4332506775849145,4.816944444444444 +28,24.34137344,0,2,3,2,42195,3,13840,35,1,3.048771676300578,3.844444444444444 +36,22.857143399999998,0,1,1,2,42195,4,10652,60,1,3.961227938415321,2.9588888888888887 +39,23.37423897,1,2,1,1,42195,3,13148,40,1,3.209233343474293,3.652222222222222 +27,20.20201874,1,2,2,2,42195,2,14100,50,1,2.9925531914893617,3.9166666666666665 +31,22.80591202,1,1,2,2,42195,3,15060,35,0,2.801792828685259,4.183333333333334 +34,23.80480194,1,2,2,1,42195,2,16050,35,1,2.6289719626168226,4.458333333333333 +60,23.87511635,0,2,1,1,42195,3,13767,42,0,3.0649378949662234,3.8241666666666663 +27,24.38188744,0,2,2,2,42195,3,15120,55,0,2.7906746031746033,4.2 +30,21.38588142,0,2,3,1,42195,2,10211,70,1,4.132308294976006,2.836388888888889 +60,24.37873268,0,2,1,1,42195,4,12480,55,1,3.3810096153846154,3.466666666666667 +38,22.37248611,1,2,1,2,42195,3,12420,60,1,3.3973429951690823,3.45 +35,28.25022507,0,1,1,1,42195,4,13147,52,0,3.2094774473263863,3.6519444444444447 +43,22.80591202,0,2,1,1,42195,3,11107,63,1,3.7989556135770237,3.085277777777778 +33,25.11189079,0,1,3,1,42195,4,11056,58,1,3.8164797395079595,3.071111111111111 +49,22.13814735,0,2,1,1,42195,2,14570,60,1,2.89601921757035,4.0472222222222225 +25,22.35768318,0,2,2,2,42195,2,9301,80,0,4.536608966777766,2.5836111111111113 +28,20.77922058,1,2,3,1,42195,2,10691,85,1,3.9467776634552427,2.9697222222222224 +39,21.70493698,1,2,2,1,42195,4,15971,38,0,2.641976081647987,4.436388888888889 +23,22.80591202,0,2,2,1,42195,3,10353,70,0,4.07563025210084,2.8758333333333335 +47,24.48979568,0,2,1,1,42195,3,12111,55,1,3.484022789199901,3.3641666666666667 +32,29.34202003,0,2,1,1,42195,4,12600,38,1,3.348809523809524,3.5 +31,23.30241394,1,2,3,3,42195,3,12123,50,1,3.4805741153179905,3.3675 +43,25.23407936,0,2,2,1,42195,2,13800,52,1,3.0576086956521737,3.8333333333333335 +38,20.41903305,1,2,3,2,42195,2,13120,62,1,3.216082317073171,3.6444444444444444 +36,22.91344261,0,2,2,2,42195,2,12423,55,0,3.396522579087177,3.4508333333333336 +43,19.64085388,0,2,3,1,42195,2,11622,57,0,3.6306143520908623,3.228333333333333 +35,25.16514397,1,3,2,1,42195,4,14453,35,1,2.9194630872483223,4.014722222222222 +51,34.34582138,0,2,2,1,42195,3,18960,45,0,2.225474683544304,5.266666666666667 +28,23.47361565,0,2,1,1,42195,2,11010,72,1,3.832425068119891,3.058333333333333 +39,22.84348106,0,2,1,1,42195,2,11148,40,1,3.784983853606028,3.0966666666666667 +31,20.3689785,1,1,2,1,42195,3,15009,41,0,2.8113132120727564,4.1691666666666665 +54,21.2900238,0,2,1,2,42195,2,10081,100,1,4.185596666997322,2.800277777777778 +26,30.05259323,1,2,1,1,42195,2,15743,32,1,2.6802388363082006,4.373055555555555 +38,21.69139481,0,2,2,2,42195,2,9627,105,1,4.382985353692739,2.6741666666666664 +34,25.04382706,0,2,3,1,42195,4,10780,50,0,3.9141929499072354,2.9944444444444445 +31,20.55720139,1,2,1,2,42195,2,12493,35,0,3.3774913951813015,3.470277777777778 +50,22.60793114,0,1,3,1,42195,4,12052,68,1,3.5010786591437104,3.347777777777778 +45,22.62626266,0,2,3,1,42195,2,10593,65,1,3.983290852449731,2.9425000000000003 +36,22.33406639,0,2,3,1,42195,4,8777,86,1,4.807451293152558,2.4380555555555556 +27,23.20066833,0,1,2,1,42195,3,12960,60,0,3.255787037037037,3.6 +39,23.49176788,1,2,3,2,42195,2,17660,55,0,2.3892978482446208,4.905555555555555 +34,22.26345062,0,2,1,1,42195,3,10425,60,0,4.047482014388489,2.8958333333333335 +47,25.66305733,0,1,3,1,42195,3,11898,38,1,3.5463943519919314,3.305 +44,19.62345314,1,2,3,1,42195,2,9838,22,1,4.28898150030494,2.7327777777777778 +35,22.91344261,0,1,1,1,42195,2,10137,65,1,4.162474104764724,2.815833333333333 +36,22.97629166,0,2,3,1,42195,3,15508,38,0,2.720853752901728,4.307777777777777 +53,23.00556564,0,1,3,1,42195,4,12255,50,1,3.4430844553243576,3.404166666666667 +37,20.34231186,0,1,3,1,42195,4,11352,50,0,3.7169661733615222,3.1533333333333333 +32,24.85795403,1,2,1,1,42195,4,14400,40,0,2.9302083333333333,4 +37,23.46041107,1,2,2,3,42195,4,17160,40,1,2.4589160839160837,4.766666666666667 +38,21.18406296,0,2,2,2,42195,2,12005,52,1,3.5147855060391504,3.3347222222222226 +24,21.06158638,1,2,1,3,42195,2,13530,60,1,3.1186252771618626,3.7583333333333333 +34,24.30183029,0,2,1,2,42195,3,12507,40,0,3.3737107219956823,3.4741666666666666 +35,24.85795403,1,2,1,3,42195,2,15527,40,1,2.7175243124879245,4.313055555555556 +25,23.08238602,1,2,2,1,42195,2,12048,60,0,3.502241035856574,3.3466666666666667 +30,21.69922447,1,2,2,2,42195,3,10334,85,1,4.083123669440681,2.8705555555555553 +37,28.49721718,0,2,1,1,42195,4,16474,50,0,2.5613087289061554,4.5761111111111115 +26,22.19460106,1,2,1,2,42195,3,20355,35,1,2.072955047899779,5.654166666666667 +33,21.23309135,0,3,2,1,42195,2,11440,65,1,3.688374125874126,3.1777777777777776 +37,21.69139481,0,2,3,1,42195,3,9290,108,1,4.541980624327233,2.5805555555555557 +26,19.44146538,1,2,3,1,42195,2,16226,30,0,2.6004560581782323,4.5072222222222225 +37,23.00556564,1,2,1,2,42195,2,16636,60,1,2.536366915123828,4.6211111111111105 +24,24.30183029,1,2,1,1,42195,3,16660,34,1,2.5327130852340938,4.627777777777778 +23,20.41903305,1,2,3,1,42195,3,17153,35,0,2.459919547601003,4.764722222222222 +44,21.68908119,1,2,1,1,42195,3,15060,55,1,2.801792828685259,4.183333333333334 +32,19.92143822,0,2,2,2,42195,3,10080,67,1,4.186011904761905,2.8 +41,21.38588142,0,2,3,3,42195,3,15617,48,0,2.701863354037267,4.338055555555556 +32,26.20307159,1,2,1,2,42195,3,14313,33.59999847,1,2.9480192831691467,3.9758333333333336 +61,23.74768066,0,2,1,1,42195,3,12900,68,1,3.2709302325581397,3.5833333333333335 +42,22.50635719,0,1,2,1,42195,2,11248,89,0,3.751333570412518,3.1244444444444444 +27,23.67722511,1,2,3,2,42195,2,13143,52,1,3.2104542341931066,3.6508333333333334 +22,25.23191071,0,1,1,1,42195,3,12132,50,1,3.4779920870425323,3.3699999999999997 +28,19.44146538,1,2,1,2,42195,4,12960,43,1,3.255787037037037,3.6 +26,29.68460083,0,2,2,1,42195,2,17137,40,1,2.4622162572212174,4.760277777777778 +25,22.14966202,0,2,3,1,42195,4,9152,110,1,4.6104676573426575,2.542222222222222 +47,25.23407936,0,2,2,1,42195,3,11620,62,1,3.6312392426850257,3.2277777777777774 +50,20.24147606,1,2,1,2,42195,4,12211,69,0,3.4554909507820817,3.391944444444445 +34,21.24975014,0,1,2,1,42195,2,11047,75,1,3.8195890286955736,3.068611111111111 +29,23.67722511,0,1,2,2,42195,3,11295,55,1,3.735723771580345,3.1375 +25,24.20903206,1,1,3,1,42195,2,13620,50,1,3.0980176211453743,3.783333333333333 +49,19.57361603,1,2,3,2,42195,2,14304,25,0,2.9498741610738257,3.9733333333333336 +32,22.02581596,0,2,1,1,42195,4,11070,51,1,3.8116531165311653,3.075 +33,25.87862206,1,2,3,3,42195,4,21300,20,0,1.9809859154929577,5.916666666666667 +35,21.38588142,1,2,1,1,42195,4,14340,30,0,2.942468619246862,3.9833333333333334 +38,22.65063095,0,2,2,1,42195,2,12485,67,1,3.379655586704045,3.468055555555556 +32,21.23309135,0,2,3,2,42195,2,12720,35,0,3.3172169811320753,3.533333333333333 +42,23.61009026,0,2,2,2,42195,3,12660,61,0,3.3329383886255926,3.5166666666666666 +44,24.48979568,0,2,1,2,42195,3,12531,50,1,3.3672492219296144,3.4808333333333334 +33,27.15151405,0,1,3,1,42195,4,19825,48,0,2.1283732660781842,5.506944444444445 +47,23.97780609,0,2,2,2,42195,3,11678,80,1,3.613204315807501,3.243888888888889 +46,31.7002182,0,2,3,1,42195,4,23100,18,0,1.8266233766233766,6.416666666666667 +36,27.54168892,1,2,2,3,42195,4,20520,40,0,2.056286549707602,5.7 +25,20.58569527,0,2,2,1,42195,4,10176,75,1,4.146521226415095,2.8266666666666667 +24,23.97780609,0,2,2,1,42195,4,9214,107,1,4.579444323855003,2.5594444444444444 +40,23.04032135,0,2,1,2,42195,4,14160,40,0,2.979872881355932,3.933333333333333 +39,21.34480858,1,2,2,2,42195,2,17640,37,0,2.3920068027210886,4.9 +34,24.5955925,1,2,3,1,42195,4,17100,30,0,2.4675438596491226,4.75 +34,23.97780609,1,2,3,1,42195,3,15958,45,0,2.644128336884321,4.432777777777777 +33,21.74523163,0,1,3,1,42195,4,11673,52,0,3.614751991775893,3.2425 +41,19.29499054,1,2,2,1,42195,4,12780,65,1,3.301643192488263,3.55 +35,21.89049911,1,2,3,1,42195,4,14169,57,1,2.977980097395723,3.9358333333333335 +59,24.23761749,0,2,1,1,42195,4,22565,38,0,1.8699313095501884,6.268055555555555 +46,21.5684433,1,2,2,1,42195,4,13100,79,1,3.2209923664122138,3.638888888888889 +58,21.74523163,0,1,2,1,42195,4,12247,100,1,3.445333551073732,3.4019444444444447 +50,23.14814949,0,2,1,2,42195,3,14433,35,0,2.923508626065267,4.009166666666667 +32,22.53944397,1,2,3,1,42195,4,12774,43,1,3.3031939877876937,3.5483333333333333 +45,19.79558945,1,2,3,1,42195,3,12844,60,0,3.2851915291186544,3.5677777777777777 +30,21.91380501,0,3,3,3,42195,2,18720,40,0,2.25400641025641,5.2 +27,23.08238602,1,2,2,3,42195,4,18514,36.5,0,2.27908609700767,5.142777777777778 +30,20.522686,1,2,1,2,42195,2,13180,35,1,3.20144157814871,3.661111111111111 +40,25.04382706,1,2,1,1,42195,2,18000,55,0,2.3441666666666667,5 +35,22.44668961,0,2,1,2,42195,2,11340,20,1,3.7208994708994707,3.15 +30,20.91937065,0,1,2,1,42195,3,9081,87,1,4.646514701024116,2.5225 +31,20.86985588,1,2,3,2,42195,4,11219,60,1,3.7610303948658528,3.116388888888889 +56,25.04382706,0,2,3,1,42195,3,17110,48,1,2.4661016949152543,4.752777777777778 +26,24.17777824,0,2,1,1,42195,2,14332,42,0,2.9441110801004746,3.9811111111111113 +47,25.96857071,0,2,1,2,42195,4,12713,60,1,3.3190434987807755,3.531388888888889 +45,24.81470108,0,2,2,2,42195,4,12212,40,0,3.4552079921388796,3.3922222222222222 +52,26.71614075,0,2,3,2,42195,2,16055,40,0,2.6281532232949236,4.459722222222222 +45,24.17159081,0,1,1,2,42195,2,15360,52,0,2.7470703125,4.266666666666667 +35,24.56541252,0,1,2,2,42195,3,10640,60,1,3.9656954887218046,2.9555555555555557 +31,22.0385685,1,1,1,3,42195,4,12775,51,1,3.30293542074364,3.548611111111111 +33,29.68460083,0,2,2,2,42195,3,14872,22,0,2.83721086605702,4.131111111111111 +31,25.65335846,1,1,1,2,42195,3,15613,55,0,2.7025555626721323,4.336944444444444 +50,25.04382706,0,2,1,2,42195,3,11961,47,1,3.527715073990469,3.3225 +39,22.51969719,0,2,1,2,42195,4,15420,40,0,2.7363813229571985,4.283333333333333 +39,21.66193008,1,2,3,1,42195,3,14660,50,0,2.8782401091405183,4.072222222222222 +43,24.55106735,0,2,2,1,42195,2,12121,70,1,3.4811484200973517,3.3669444444444445 +37,19.24729347,1,2,2,1,42195,3,14820,100,1,2.847165991902834,4.116666666666666 +38,28.81798363,1,2,3,1,42195,2,16449,60,1,2.565201532008025,4.569166666666666 +35,20.58569527,0,2,3,1,42195,4,12480,56,1,3.3810096153846154,3.466666666666667 +34,20.73756218,1,2,3,1,42195,2,13693,50,0,3.0815014971153145,3.803611111111111 +28,21.74523163,0,2,3,2,42195,3,16513,26,0,2.555259492521044,4.586944444444444 +28,20.86985588,1,2,3,1,42195,4,15627,45,0,2.7001343827990016,4.340833333333333 +48,25.67717934,0,2,3,1,42195,2,16140,20,0,2.6143122676579926,4.483333333333333 +23,20.48237419,0,2,1,1,42195,2,10781,55,0,3.9138298859103977,2.9947222222222223 +51,25.74573708,1,2,1,1,42195,3,16578,40,0,2.5452406804198335,4.605 +33,25.92195129,0,1,3,2,42195,3,12989,38,1,3.2485179767495573,3.608055555555555 +28,22.89282227,1,2,1,1,42195,3,12664,55,1,3.3318856601389766,3.517777777777778 +39,23.00785828,0,2,1,1,42195,2,11065,70,1,3.8133755083596927,3.073611111111111 +38,25.97402573,0,2,1,1,42195,4,13201,55,1,3.1963487614574655,3.666944444444445 +42,24.6258564,0,2,1,1,42195,2,15009,57,0,2.8113132120727564,4.1691666666666665 +41,20.03506279,1,2,3,1,42195,3,13458,57,0,3.135309852875613,3.7383333333333337 +27,25.23191071,0,1,3,1,42195,2,12267,80,0,3.4397163120567376,3.4074999999999998 +42,21.87164688,1,2,3,1,42195,2,12399,65,1,3.4030970239535447,3.444166666666667 +29,24.1301918,0,2,2,1,42195,4,12179,52,0,3.4645701617538385,3.3830555555555555 +44,21.69139481,1,2,2,1,42195,3,13609,40,1,3.1005217135719008,3.7802777777777776 +31,23.00573349,1,2,2,1,42195,2,12817,56,1,3.2921120386986034,3.560277777777778 +22,21.5213356,0,2,1,1,42195,3,11841,50,0,3.563465923486192,3.2891666666666666 +28,23.74768066,1,2,2,3,42195,4,22623,40,0,1.865137249701631,6.284166666666667 +55,26.7323513,0,2,3,2,42195,3,13924,42,0,3.030379201378914,3.8677777777777775 +28,20.24406624,1,2,1,1,42195,3,12851,60,1,3.2834020698778303,3.5697222222222225 +29,21.87164688,1,2,3,2,42195,4,17574,40,0,2.400990099009901,4.881666666666666 +28,21.62211609,1,2,3,2,42195,2,12180,50,1,3.4642857142857144,3.3833333333333333 +27,24.81470108,0,1,3,2,42195,3,14442,31,1,2.9216867469879517,4.011666666666667 +35,25.05203247,0,2,3,2,42195,4,14399,55,1,2.9304118341551497,3.999722222222222 +40,22.176784519999998,0,2,3,1,42195,3,9600,87,1,4.3953125,2.6666666666666665 +33,24.38188744,0,2,3,1,42195,3,12900,45,1,3.2709302325581397,3.5833333333333335 +32,22.51829529,0,2,1,1,42195,4,10139,100,0,4.16165302298057,2.8163888888888886 +39,19.66026878,1,2,2,1,42195,4,14940,58,0,2.82429718875502,4.15 +45,23.84960938,0,2,1,1,42195,3,12510,40,0,3.3729016786570742,3.475 +27,22.3776226,1,2,3,1,42195,3,13496,60,1,3.1264819205690575,3.748888888888889 +41,25.79427338,1,2,2,1,42195,3,14060,55,0,3.0010668563300142,3.905555555555556 +30,21.91380501,0,2,1,1,42195,3,12029,54,0,3.5077728822013468,3.3413888888888885 +28,23.94907188,0,2,2,2,42195,3,12289,34,1,3.433558466921637,3.413611111111111 +46,25.85858536,0,2,3,2,42195,2,16638,37,0,2.5360620266858995,4.621666666666667 +43,26.69023323,0,2,3,3,42195,4,20100,35,0,2.0992537313432837,5.583333333333333 +39,23.27121544,1,2,1,1,42195,2,19920,40,1,2.118222891566265,5.533333333333333 +24,20.86985588,1,2,2,2,42195,4,14400,40,0,2.9302083333333333,4 +31,20.15620995,1,1,1,3,42195,3,11205,42,0,3.7657295850066936,3.1125 +30,23.37472534,0,2,2,3,42195,4,14136,45,1,2.984932088285229,3.9266666666666667 +41,28.44444466,0,2,3,1,42195,4,14100,50,0,2.9925531914893617,3.9166666666666665 +42,23.00573349,1,2,3,1,42195,3,14820,48,0,2.847165991902834,4.116666666666666 +42,21.23312569,0,2,2,1,42195,3,10850,45,0,3.8889400921658988,3.013888888888889 +40,25.11189079,0,2,1,3,42195,4,16080,55,1,2.6240671641791047,4.466666666666667 +47,24.70283508,0,2,1,3,42195,3,12720,70,1,3.3172169811320753,3.533333333333333 +30,22.01950264,1,2,3,2,42195,3,18585,30,1,2.2703793381759483,5.1625 +30,23.74768066,0,1,3,1,42195,4,9278,112,1,4.547855141194223,2.5772222222222223 +33,25.11223602,0,3,1,1,42195,2,12555,55,1,3.3608124253285543,3.4875 +28,26.7323513,0,2,1,1,42195,4,12536,48,0,3.365906190172304,3.482222222222222 +30,24.41077614,0,2,2,2,42195,2,10517,63,1,4.01207568698298,2.921388888888889 +41,22.80591202,0,1,3,1,42195,3,10065,55,1,4.192250372578242,2.7958333333333334 +28,20.77414703,1,3,1,1,42195,4,11190,91,1,3.7707774798927614,3.1083333333333334 +30,19.17613602,1,1,1,3,42195,2,13289,55,1,3.1751824817518246,3.6913888888888886 +44,24.44100571,0,2,1,1,42195,4,9518,108,1,4.433179239335995,2.6438888888888887 +41,19.47665596,0,1,2,2,42195,2,10579,61,0,3.9885622459589753,2.938611111111111 +37,24.48979568,0,2,1,1,42195,3,13027,60,1,3.2390419897136717,3.6186111111111114 +28,22.68170738,0,1,2,1,42195,4,9615,88,1,4.388455538221529,2.6708333333333334 +51,25.21078491,0,2,3,2,42195,3,14247,50,1,2.961676142345757,3.9575 +32,25.24751663,0,2,1,2,42195,3,11593,50,1,3.6396963684982317,3.220277777777778 +41,20.73756218,0,2,1,2,42195,3,9807,95,0,4.302539002753136,2.7241666666666666 +31,22.60793114,0,2,1,1,42195,4,11426,60,1,3.6928934010152283,3.173888888888889 +41,27.45825768,0,2,1,2,42195,2,13400,22,1,3.148880597014925,3.7222222222222223 +40,20.62209892,1,2,2,1,42195,2,12549,105,1,3.3624193162801816,3.4858333333333333 +31,47.18464661,1,2,2,1,42195,3,22680,55,0,1.8604497354497354,6.3 +28,20.20103264,0,2,2,1,42195,4,9483,92,1,4.4495412844036695,2.6341666666666668 +37,24.04452515,0,2,3,1,42195,2,10480,60,0,4.026240458015267,2.911111111111111 +35,23.66053009,0,2,3,2,42195,2,10467,65,0,4.031241043278876,2.9074999999999998 +32,19.34570503,0,1,1,1,42195,4,8152,110,1,5.176030421982335,2.2644444444444445 +30,23.74768066,0,2,3,2,42195,3,10136,90,1,4.162884767166535,2.8155555555555556 +49,19.79558945,1,2,2,1,42195,2,12858,47,1,3.281614559029398,3.5716666666666668 +44,22.32523155,1,1,2,1,42195,2,16973,43,1,2.4860071878866434,4.714722222222222 +54,23.00556564,0,2,3,1,42195,3,11580,70,0,3.643782383419689,3.216666666666667 +30,20.47117615,0,2,2,2,42195,4,8815,90,1,4.7867271695972775,2.448611111111111 +52,22.14966202,0,2,2,1,42195,2,11541,56,1,3.6560956589550297,3.205833333333333 +28,22.26345062,0,1,2,2,42195,2,12150,65,1,3.4728395061728397,3.375 +31,24.52615929,1,2,1,1,42195,4,13879,55,0,3.0402046256934936,3.855277777777778 +45,21.0636425,0,2,2,1,42195,2,11807,90,1,3.5737274498179046,3.279722222222222 +46,20.65626717,1,2,2,1,42195,2,12600,60,1,3.348809523809524,3.5 +29,23.08238602,1,2,2,1,42195,3,13162,60,1,3.2058197842273213,3.656111111111111 +39,23.80480194,0,2,1,1,42195,2,11606,50,0,3.6356195071514734,3.223888888888889 +44,24.56541252,0,2,1,1,42195,2,9300,65,1,4.537096774193548,2.5833333333333335 +46,34.39236832,1,2,3,1,42195,4,12423,62,1,3.396522579087177,3.4508333333333336 +33,25.52073479,0,2,1,3,42195,2,20714,15,0,2.037028096939268,5.7538888888888895 +30,23.37472534,0,2,1,1,42195,4,11332,59,0,3.7235262972114365,3.147777777777778 +37,21.83592224,0,2,1,1,42195,4,14640,45,1,2.882172131147541,4.066666666666666 +31,32.5569725,1,2,1,1,42195,2,18600,32,0,2.268548387096774,5.166666666666667 +32,26.11717224,0,1,3,2,42195,4,8950,36,0,4.714525139664804,2.486111111111111 +34,25.74573708,1,2,3,2,42195,2,18005,45,0,2.3435156900860874,5.001388888888888 +38,27.12304497,0,2,3,1,42195,4,17878,42,0,2.3601633292314577,4.96611111111111 +20,21.23309135,0,1,3,3,42195,2,12910,35,0,3.2683965917893105,3.586111111111111 +23,19.17613602,1,2,2,3,42195,3,15276,45,0,2.762175962293794,4.243333333333333 +25,21.04377174,0,2,2,1,42195,2,11703,51,1,3.605485772878749,3.2508333333333335 +47,24.09060478,0,1,2,1,42195,3,13170,70,0,3.203872437357631,3.658333333333333 +30,27.54821014,0,2,1,1,42195,2,11505,72,1,3.667535853976532,3.1958333333333333 +28,21.17267418,1,1,2,2,42195,2,18600,30,0,2.268548387096774,5.166666666666667 +43,30.55125809,0,2,1,1,42195,2,15120,40,1,2.7906746031746033,4.2 +30,20.41903305,1,2,2,1,42195,2,14760,40,1,2.858739837398374,4.1 +33,24.13549423,0,2,2,1,42195,2,12720,35,1,3.3172169811320753,3.533333333333333 +37,23.84960938,0,2,3,1,42195,3,10980,60,1,3.842896174863388,3.05 +27,21.83592224,0,2,2,1,42195,2,13920,60,1,3.03125,3.8666666666666667 +45,27.22624207,0,2,3,2,42195,2,15600,30,0,2.7048076923076922,4.333333333333333 +32,18.19851494,1,1,3,2,42195,4,12530,60,1,3.367517956903432,3.4805555555555556 +24,21.06158638,1,2,3,1,42195,2,10340,75,1,4.080754352030948,2.8722222222222222 +37,25.96857071,0,2,2,1,42195,4,13860,55,1,3.044372294372294,3.85 +47,21.96660423,0,1,1,1,42195,3,13162,55,0,3.2058197842273213,3.656111111111111 +58,19.20026779,1,2,3,1,42195,3,17421,41,0,2.4220768038574136,4.839166666666667 +39,23.67722511,0,2,1,1,42195,3,12813,40,1,3.293139779911028,3.559166666666667 +50,24.13549423,0,2,3,3,42195,4,17040,46,0,2.476232394366197,4.733333333333333 +44,22.20382309,1,2,2,1,42195,2,16209,50,1,2.603183416620396,4.5024999999999995 +30,22.01950264,0,1,1,1,42195,4,14115,40,0,2.989373007438895,3.9208333333333334 +42,24.34137344,0,2,3,2,42195,4,13200,40,1,3.1965909090909093,3.6666666666666665 +46,21.22448921,0,1,3,2,42195,2,10176,55,1,4.146521226415095,2.8266666666666667 +26,23.27775955,0,2,3,1,42195,2,9904,76,1,4.2603998384491115,2.751111111111111 +34,21.38588142,0,2,3,2,42195,3,13480,50,0,3.130192878338279,3.7444444444444445 +31,21.24975014,0,1,1,3,42195,2,11175,30,0,3.7758389261744965,3.1041666666666665 +51,25.96857071,0,2,2,2,42195,2,13617,53,0,3.098700154218991,3.7824999999999998 +32,29.38775444,0,2,1,1,42195,4,15077,45,0,2.798633680440406,4.188055555555556 +37,36.75645447,0,2,2,1,42195,3,21046,22,0,2.0048940416231114,5.846111111111111 +31,23.08344269,1,1,3,1,42195,3,15070,40,0,2.7999336429993362,4.186111111111111 +54,21.38588142,0,1,1,1,42195,4,18900,32,0,2.2325396825396826,5.25 +50,26.7323513,0,2,3,2,42195,4,13444,65,1,3.1385748289199644,3.7344444444444442 +33,21.06158638,1,2,2,1,42195,3,12481,50,1,3.3807387228587453,3.4669444444444446 +45,25.81369209,0,2,3,1,42195,4,12749,63,1,3.309671346772296,3.5413888888888887 +45,23.45083618,0,1,3,1,42195,4,10963,65,1,3.8488552403539176,3.0452777777777778 +34,26.42340279,0,2,3,2,42195,3,11790,68,1,3.578880407124682,3.275 +56,22.41187286,0,2,2,1,42195,3,10906,80,1,3.8689712085090777,3.0294444444444446 +37,25.92195129,1,2,2,3,42195,3,17281,40,0,2.441698975753718,4.800277777777778 +40,28.13294601,0,2,3,1,42195,3,14355,70,1,2.9393939393939394,3.9875 +64,21.2037735,1,2,2,1,42195,2,15000,17,0,2.813,4.166666666666667 +49,26.61252975,0,2,1,1,42195,2,15587,51,1,2.707063578623212,4.329722222222222 +24,21.51694489,1,2,2,1,42195,3,12830,50,0,3.2887763055339048,3.563888888888889 +55,26.6554451,0,1,3,3,42195,2,16410,41,0,2.5712979890310788,4.558333333333334 +35,31.56565857,0,2,1,3,42195,2,16500,65,1,2.557272727272727,4.583333333333333 +37,25.68181801,0,2,2,3,42195,4,20100,35,0,2.0992537313432837,5.583333333333333 +32,23.67722511,0,1,2,1,42195,4,13162,46,1,3.2058197842273213,3.656111111111111 +30,22.68170738,1,2,1,1,42195,3,12753,51,1,3.308633262761703,3.5425 +40,24.93506622,0,2,2,1,42195,3,13405,65,0,3.147706079820962,3.723611111111111 +33,26.37486076,0,2,3,1,42195,2,14822,45,0,2.8467818108217515,4.117222222222222 +26,25.85858536,0,1,2,2,42195,2,16506,45,1,2.5563431479462015,4.585 +49,22.68170738,1,2,1,2,42195,3,18420,33,0,2.29071661237785,5.116666666666666 +34,21.17267418,0,1,2,2,42195,3,10530,72,1,4.007122507122507,2.925 +47,21.39037323,1,2,3,1,42195,2,15000,40,1,2.813,4.166666666666667 +27,27.52437782,0,1,1,2,42195,4,12426,45,0,3.395702559150169,3.4516666666666667 +35,25.24751663,1,2,1,3,42195,3,13079,52,1,3.2261640798226163,3.6330555555555555 +46,23.84960938,0,2,2,1,42195,2,11913,50,0,3.5419289851422815,3.309166666666667 +55,24.0420742,0,2,2,1,42195,3,12741,70,1,3.3117494702142687,3.5391666666666666 +38,24.85058212,1,2,2,2,42195,4,13210,65,0,3.1941710825132477,3.6694444444444443 +60,32.40740967,0,2,1,1,42195,3,18730,50,0,2.252802989855846,5.202777777777778 +36,23.45410728,1,1,1,2,42195,3,17077,30,0,2.470867248345728,4.743611111111111 +53,21.70465088,1,2,2,1,42195,3,16500,50,1,2.557272727272727,4.583333333333333 +32,24.64646339,1,2,3,1,42195,4,17149,40,0,2.460493323225844,4.7636111111111115 +32,23.82998085,1,2,3,3,42195,3,14379,28,1,2.934487794700605,3.9941666666666666 +51,26.56771851,0,2,3,1,42195,3,14745,43,0,2.861648016276704,4.095833333333333 +41,24.54295158,0,1,1,1,42195,3,9960,100,1,4.23644578313253,2.7666666666666666 +49,20.89957428,1,2,3,2,42195,3,12363,67,1,3.4130065518078134,3.434166666666667 +55,24.37873268,0,2,1,2,42195,3,12180,33,0,3.4642857142857144,3.3833333333333333 +34,24.23761749,0,2,2,2,42195,4,15610,44,0,2.7030749519538757,4.336111111111111 +47,21.38588142,0,2,2,2,42195,3,9827,60,1,4.293782436145314,2.729722222222222 +46,24.69135857,0,2,1,3,42195,3,17820,40,0,2.3678451178451176,4.95 +26,23.37423897,0,2,2,1,42195,4,14820,30,0,2.847165991902834,4.116666666666666 +43,23.06619835,0,1,3,1,42195,3,13105,73,0,3.2197634490652423,3.6402777777777775 +26,21.96660423,0,2,3,1,42195,3,11160,40,1,3.7809139784946235,3.1 +28,23.45410728,1,2,2,1,42195,4,13396,50,0,3.1498208420424008,3.7211111111111115 +48,20.25152397,1,2,1,1,42195,3,15840,45,1,2.663825757575758,4.4 +53,22.68170738,0,2,2,1,42195,3,11507,64,0,3.666898409663683,3.196388888888889 +45,23.2189579,0,2,2,1,42195,4,11470,50,0,3.678727114210985,3.186111111111111 +40,23.61009026,0,2,2,1,42195,4,10627,85,1,3.9705467206172957,2.9519444444444445 +59,23.24191475,0,2,2,1,42195,3,14785,56,1,2.8539059857964153,4.106944444444444 +24,21.98859215,1,1,3,1,42195,4,15284,20,0,2.760730175346768,4.245555555555556 +46,25.05050468,1,2,2,1,42195,4,13720,55,1,3.0754373177842567,3.811111111111111 +55,21.30681801,1,1,1,2,42195,2,13506,54,1,3.124167036872501,3.7516666666666665 +33,22.68170738,1,2,3,3,42195,2,19307,32,1,2.1854767700833895,5.363055555555556 +31,20.03506279,0,2,1,1,42195,3,9414,110,1,4.482154238368388,2.615 +40,27.15151405,0,2,2,1,42195,2,17623,21,1,2.3943142484253532,4.895277777777777 +36,23.67722511,0,1,3,1,42195,3,14712,20.60000038,1,2.8680668841761827,4.086666666666667 +38,20.63082695,0,2,2,1,42195,3,13142,52,1,3.2106985238167707,3.6505555555555556 +41,23.79261398,0,2,2,1,42195,4,9801,80,1,4.305172941536578,2.7224999999999997 +47,23.49176788,0,2,3,1,42195,4,10588,55,0,3.9851718927087267,2.9411111111111112 +49,26.27408218,0,2,2,1,42195,4,12516,44,0,3.3712847555129435,3.4766666666666666 +27,20.65626717,1,2,3,1,42195,3,12545,40,0,3.363491430848944,3.4847222222222225 +45,25.96857071,0,2,3,1,42195,3,13973,30,0,3.0197523795892076,3.8813888888888886 +36,20.03506279,1,2,3,2,42195,2,15722,60,1,2.683818852563287,4.367222222222223 +39,24.44100571,0,1,1,2,42195,3,13565,33,1,3.110578695171397,3.7680555555555557 +27,22.51829529,0,2,1,2,42195,4,10846,61,1,3.890374331550802,3.012777777777778 +47,25.92195129,0,2,3,1,42195,2,14045,65,0,3.0042719829120683,3.901388888888889 +33,22.86756706,0,2,3,1,42195,2,15294,43,1,2.7589250686543743,4.248333333333333 +31,26.93602562,1,1,3,1,42195,3,14109,45,0,2.990644269615139,3.919166666666667 +46,21.04377174,0,2,2,1,42195,2,11492,60,1,3.6716846501914375,3.192222222222222 +42,29.09090996,0,2,1,1,42195,2,12613,25,0,3.3453579640053914,3.5036111111111112 +49,24.48979568,0,2,2,1,42195,3,13080,100,0,3.2259174311926606,3.6333333333333333 +31,22.26345062,0,2,3,2,42195,3,11090,70,1,3.8047790802524797,3.0805555555555557 +40,25.24751663,0,2,3,3,42195,2,15600,60,1,2.7048076923076922,4.333333333333333 +48,25.81369209,0,2,2,2,42195,3,13835,56,1,3.049873509215757,3.843055555555556 +30,21.38588142,1,2,2,1,42195,4,17970,25,0,2.3480801335559267,4.991666666666666 +67,28.05836296,0,2,3,1,42195,2,21600,40,0,1.9534722222222223,6 +33,21.640728,1,1,1,1,42195,2,14043,35,1,3.0046998504593034,3.9008333333333334 +35,20.31658554,0,2,3,3,42195,3,10424,63,1,4.047870299309286,2.895555555555555 +42,24.04452515,0,2,1,1,42195,3,13680,100,0,3.0844298245614037,3.8 +51,32.36148453,1,2,1,2,42195,3,22333,42,0,1.8893565575605606,6.203611111111111 +37,23.88304138,0,1,1,1,42195,3,11987,40,1,3.5200634020188537,3.3297222222222222 +33,18.32382584,1,2,3,1,42195,4,11966,50,0,3.5262410162126026,3.323888888888889 +25,21.5213356,0,1,3,1,42195,2,12300,70,1,3.430487804878049,3.4166666666666665 +54,21.87164688,1,2,2,1,42195,4,14306,51,1,2.9494617642947016,3.973888888888889 +34,25.74573708,0,2,1,1,42195,4,13185,45,0,3.200227531285552,3.6625 +39,22.36208534,0,1,1,1,42195,3,10922,54,0,3.863303424281267,3.033888888888889 +32,22.72727203,1,2,3,1,42195,4,14700,55,1,2.870408163265306,4.083333333333333 +45,21.97979736,0,2,3,2,42195,4,11017,70,0,3.829990015430698,3.060277777777778 +34,23.5923233,1,2,2,2,42195,3,16151,30,0,2.612531731781314,4.4863888888888885 +37,24.1301918,0,2,1,1,42195,3,14096,30,0,2.9934023836549377,3.9155555555555557 +49,19.53418541,1,1,3,1,42195,2,13767,76,1,3.0649378949662234,3.8241666666666663 +70,24.30183029,0,2,2,1,42195,2,17706,42,1,2.383090477804134,4.918333333333334 +38,22.53944397,1,2,2,1,42195,3,15480,30,0,2.72577519379845,4.3 +32,23.14814949,0,2,1,1,42195,3,12540,45,0,3.3648325358851676,3.4833333333333334 +36,25.25252724,0,1,3,1,42195,2,15060,28,0,2.801792828685259,4.183333333333334 +38,22.91344261,0,2,1,1,42195,4,14040,37,1,3.0053418803418803,3.9 +33,22.72154427,1,2,3,1,42195,3,16877,35,0,2.50014813059193,4.688055555555556 +35,18.84972191,1,2,2,1,42195,3,11002,75,1,3.8352117796764227,3.0561111111111114 +45,27.5420742,0,2,2,1,42195,2,15720,50,1,2.6841603053435112,4.366666666666666 +39,22.36208534,0,2,2,2,42195,4,11708,20,1,3.6039460198155107,3.252222222222222 +33,23.49176788,1,2,3,2,42195,2,11580,50,1,3.643782383419689,3.216666666666667 +35,22.70362473,1,1,3,1,42195,3,15028,50,1,2.8077588501463935,4.174444444444444 +41,24.27048302,0,2,3,1,42195,4,11604,80,1,3.636246122026887,3.2233333333333336 +47,23.20066833,0,2,2,2,42195,3,12840,3,1,3.286214953271028,3.566666666666667 +43,20.86985588,1,1,2,2,42195,2,17927,38.5,1,2.353712277570146,4.979722222222223 +43,22.80591202,0,2,2,1,42195,4,11510,55,1,3.6659426585577757,3.1972222222222224 +44,31.16883087,0,2,3,1,42195,3,14942,45,1,2.8239191540623745,4.150555555555555 +35,22.01950264,1,2,1,2,42195,4,13090,38,1,3.2234530175706646,3.636111111111111 +38,19.24001694,1,2,1,1,42195,4,14107,55,1,2.99106826398242,3.9186111111111113 +31,23.00556564,0,2,2,2,42195,2,14314,55,1,2.9478133296073774,3.976111111111111 +23,21.51694489,1,2,3,1,42195,4,10991,73,1,3.8390501319261214,3.0530555555555554 +35,19.64085388,0,2,2,1,42195,3,13111,52,1,3.2182899855083518,3.641944444444445 +34,20.89263725,1,1,2,1,42195,4,11880,45,0,3.551767676767677,3.3 +32,21.87164688,1,2,2,2,42195,2,12700,70,1,3.32244094488189,3.5277777777777777 +46,25.21212006,0,2,2,2,42195,2,13320,42,1,3.1677927927927927,3.7 +27,21.06158638,0,2,3,3,42195,4,10905,55,0,3.869325997248968,3.029166666666667 +24,21.74523163,0,2,2,3,42195,2,11520,45,1,3.6627604166666665,3.2 +43,20.63079834,0,1,1,1,42195,3,11363,48,1,3.713367948605122,3.156388888888889 +34,23.67722511,1,2,2,1,42195,3,19516,40,1,2.1620721459315435,5.421111111111111 +30,26.71614075,0,2,3,1,42195,4,11620,30,0,3.6312392426850257,3.2277777777777774 +38,23.5923233,0,2,3,2,42195,3,12120,55,0,3.4814356435643563,3.3666666666666667 +51,23.84960938,0,1,3,1,42195,4,12648,58,1,3.336100569259962,3.5133333333333336 +46,23.74768066,0,1,2,2,42195,4,12930,80,1,3.2633410672853826,3.591666666666667 +32,22.80591202,1,2,1,2,42195,3,15473,62,1,2.7270083371033413,4.298055555555555 +54,29.26304436,1,2,2,1,42195,2,15840,54,0,2.663825757575758,4.4 +24,19.47665596,0,2,2,1,42195,3,12432,45,1,3.3940637065637067,3.453333333333333 +69,23.06619835,0,2,1,2,42195,2,14460,45,1,2.91804979253112,4.016666666666667 +40,22.35768318,1,2,1,2,42195,3,15922,55,1,2.650106770506218,4.422777777777778 +52,25.32242584,0,2,2,1,42195,2,14275,43,0,2.955866900175131,3.9652777777777777 +36,20.91937065,0,2,3,1,42195,3,9398,90,1,4.489785060651203,2.6105555555555555 +48,25.24778557,0,2,2,1,42195,4,12059,55,1,3.499046355419189,3.349722222222222 +45,24.55106735,0,2,2,1,42195,2,13392,42,0,3.1507616487455197,3.7199999999999998 +28,20.86985588,1,1,1,1,42195,3,14768,40,0,2.857191224268689,4.102222222222222 +49,22.3776226,1,1,1,1,42195,2,12275,74,1,3.4374745417515276,3.4097222222222223 +40,20.03710556,0,1,1,1,42195,3,10600,80,0,3.9806603773584905,2.944444444444444 +56,23.52446938,0,1,3,1,42195,3,12663,65,1,3.332148779909974,3.5175 +25,21.66193008,1,2,3,1,42195,2,12247,50,0,3.445333551073732,3.4019444444444447 +35,20.65626717,1,2,2,3,42195,3,14465,40,0,2.9170411337711717,4.018055555555556 +39,22.35768318,0,2,3,2,42195,2,11334,55,1,3.722869242985707,3.1483333333333334 +38,23.14814949,0,2,1,2,42195,3,11780,50,1,3.581918505942275,3.2722222222222226 +29,20.13986015,1,2,1,2,42195,3,13413,45,1,3.145828673674793,3.7258333333333336 +42,25.97402573,0,2,3,3,42195,3,16500,35,0,2.557272727272727,4.583333333333333 +44,22.33406639,0,2,2,2,42195,2,10685,70,1,3.9489939167056622,2.968055555555556 +37,22.56029701,0,1,2,1,42195,3,13502,45,1,3.1250925788772035,3.7505555555555556 +29,24.48979568,0,2,2,1,42195,3,11011,55,1,3.8320770138951956,3.0586111111111114 +44,24.20903206,0,1,2,1,42195,4,10913,43,1,3.8664895079263264,3.031388888888889 +33,21.51694489,1,1,3,2,42195,4,12510,55,1,3.3729016786570742,3.475 +27,19.44146538,0,2,1,2,42195,3,9540,90,1,4.422955974842767,2.65 +35,20.73756218,1,2,3,1,42195,2,12496,55,1,3.376680537772087,3.4711111111111115 +43,25.81369209,0,2,1,1,42195,3,13989,40,1,3.016298520265923,3.8858333333333333 +40,17.84652138,0,2,3,1,42195,4,12518,60,1,3.370746125579166,3.477222222222222 +38,25.24751663,0,2,3,1,42195,3,12510,55,1,3.3729016786570742,3.475 +28,22.68170738,1,2,3,1,42195,2,12510,65,1,3.3729016786570742,3.475 +37,22.80591202,0,2,3,1,42195,3,10920,50,1,3.864010989010989,3.033333333333333 +36,21.51694489,1,2,2,1,42195,3,11400,35,0,3.7013157894736843,3.1666666666666665 +37,27.20981216,0,1,3,2,42195,2,12424,50,0,3.396249195106246,3.451111111111111 +44,22.57785988,0,2,3,1,42195,2,12823,50,1,3.2905716291039537,3.5619444444444444 +43,22.60793114,0,2,3,1,42195,2,16160,20,0,2.611076732673267,4.488888888888889 +35,19.27918243,1,1,3,1,42195,3,11940,65,1,3.5339195979899496,3.316666666666667 +22,21.87164688,1,2,1,1,42195,4,12065,62,1,3.4973062577704104,3.351388888888889 +53,23.37171364,0,2,1,1,42195,2,12430,75,1,3.3946098149637973,3.4527777777777775 +52,21.03681564,1,2,2,2,42195,4,12578,57,1,3.354666878677055,3.493888888888889 +36,22.14966202,1,1,3,3,42195,4,14040,45,0,3.0053418803418803,3.9 +44,23.49176788,0,2,3,2,42195,2,11524,21,0,3.661489066296425,3.201111111111111 +40,22.86775398,1,2,2,1,42195,2,16440,36,1,2.5666058394160585,4.566666666666666 +30,21.37924576,1,1,3,1,42195,2,12180,63,1,3.4642857142857144,3.3833333333333333 +45,21.38588142,0,1,2,1,42195,3,10875,82,1,3.88,3.0208333333333335 +34,22.51829529,0,2,2,1,42195,2,9533,85,1,4.426203713416553,2.6480555555555556 +29,21.38588142,0,1,1,3,42195,2,9877,90,1,4.272046167864736,2.7436111111111114 +39,21.37291145,0,2,1,2,42195,3,9601,130,1,4.3948547026351426,2.666944444444445 +46,20.41903305,1,2,3,1,42195,4,12837,84,1,3.286982939939238,3.565833333333333 +34,21.87164688,1,1,1,2,42195,3,12255,46,0,3.4430844553243576,3.404166666666667 +34,25.66305733,0,2,1,2,42195,3,14820,35,0,2.847165991902834,4.116666666666666 +38,27.41159058,0,2,2,3,42195,4,18026,18,0,2.34078553200932,5.0072222222222225 +32,24.5955925,1,1,3,1,42195,2,12835,58,1,3.287495130502532,3.5652777777777778 +32,25.8121376,0,2,2,2,42195,2,14459,43,0,2.9182516079950203,4.016388888888889 +31,25.75936508,0,2,2,1,42195,4,13025,50,1,3.239539347408829,3.618055555555556 +40,22.26345062,0,1,3,1,42195,3,8657,125,1,4.874090331523623,2.404722222222222 +40,25.20478821,0,1,3,1,42195,3,13585,40,1,3.1059992638940006,3.773611111111111 +32,23.49176788,0,2,3,1,42195,3,13087,50,1,3.2241919462061586,3.635277777777778 +44,22.79490089,0,2,2,1,42195,3,13456,70,1,3.1357758620689653,3.737777777777778 +44,23.5923233,0,2,1,1,42195,4,11985,45,0,3.520650813516896,3.3291666666666666 +28,24.97483635,0,2,2,1,42195,3,11200,67,0,3.7674107142857145,3.111111111111111 +31,23.84960938,0,2,2,3,42195,4,14160,50,0,2.979872881355932,3.933333333333333 +37,21.07580948,1,2,3,2,42195,3,13812,48,1,3.054952215464813,3.8366666666666664 +26,24.73716545,1,1,1,2,42195,2,14668,43,0,2.8766703026997544,4.0744444444444445 +56,25.47970772,0,1,2,2,42195,3,14650,40,0,2.8802047781569966,4.069444444444445 +28,21.70963478,1,2,1,1,42195,2,12153,40,1,3.4719822266107134,3.3758333333333335 +26,23.2189579,0,2,2,1,42195,2,10087,105,1,4.183106969366511,2.8019444444444446 +29,22.0333519,1,2,1,1,42195,2,12352,46,1,3.4160459844559585,3.4311111111111114 +29,22.03732109,0,2,2,1,42195,3,9392,40,1,4.4926533219761495,2.608888888888889 +36,23.00573349,0,2,2,1,42195,2,10473,75,0,4.028931538241191,2.9091666666666667 +32,24.96845627,0,2,3,2,42195,2,13131,50,1,3.213388165410098,3.6475 +34,23.16774559,1,2,1,1,42195,4,14268,44,0,2.957317073170732,3.9633333333333334 +25,21.91380501,0,2,1,1,42195,3,10021,55,1,4.2106576190000995,2.7836111111111115 +48,23.74768066,0,2,3,3,42195,2,15840,26,0,2.663825757575758,4.4 +35,24.04452515,0,2,3,1,42195,3,10861,74,0,3.8850013810882977,3.016944444444445 +49,20.522686,1,2,2,1,42195,4,13020,65,0,3.2407834101382487,3.6166666666666667 +49,23.27272797,0,3,1,1,42195,3,15873,20,1,2.6582876582876582,4.409166666666667 +29,24.22145271,0,2,1,3,42195,4,12225,60,1,3.4515337423312884,3.3958333333333335 +38,22.26345062,0,1,3,2,42195,3,11220,70,1,3.7606951871657754,3.1166666666666667 +43,24.57002449,0,2,3,1,42195,3,11762,90,1,3.5874001020234654,3.2672222222222222 +42,22.16610718,0,2,3,2,42195,2,10522,65,1,4.010169169359437,2.9227777777777777 +28,28.40193176,1,2,2,1,42195,3,16800,40,0,2.5116071428571427,4.666666666666667 +26,20.03710556,0,1,1,1,42195,3,10822,65,0,3.8990020328959525,3.006111111111111 +27,23.84337807,1,1,2,2,42195,2,11145,83,1,3.7860026917900402,3.095833333333333 +25,20.47117615,0,2,3,2,42195,3,10560,105,1,3.9957386363636362,2.933333333333333 +31,21.03681564,1,2,1,1,42195,2,12500,51,0,3.3756,3.4722222222222223 +32,22.91344261,0,2,1,1,42195,2,10349,64,1,4.077205527104068,2.874722222222222 +35,26.69023323,0,2,3,1,42195,3,15000,50,1,2.813,4.166666666666667 +31,21.07240105,1,2,3,1,42195,4,12500,60,1,3.3756,3.4722222222222223 +38,28.11374855,0,2,1,1,42195,3,16280,55,0,2.591830466830467,4.522222222222222 +46,27.03786278,0,2,2,1,42195,3,14378,40,1,2.934691890388093,3.993888888888889 +28,22.3776226,1,2,3,1,42195,4,11835,55,1,3.565272496831432,3.2875 +66,20.76318932,0,2,1,1,42195,3,18930,60,1,2.229001584786054,5.258333333333334 +44,23.42873192,0,2,2,1,42195,4,15310,49,0,2.756041802743305,4.252777777777777 +24,20.19801331,0,2,1,1,42195,4,9130,86,1,4.62157721796276,2.536111111111111 +30,23.97016907,1,2,2,3,42195,3,17827,25,0,2.36691535311606,4.951944444444445 +28,21.640728,0,2,2,2,42195,4,8242,130,1,5.11950982771172,2.2894444444444444 +30,23.5923233,1,2,1,1,42195,4,15496,70,1,2.7229607640681466,4.304444444444444 +33,23.93687057,0,2,1,2,42195,4,8797,104,1,4.79652154143458,2.443611111111111 +31,23.24191475,0,2,3,1,42195,2,14460,50,1,2.91804979253112,4.016666666666667 +28,24.85058212,0,1,2,1,42195,3,14161,50,1,2.979662453216581,3.9336111111111114 +36,23.5923233,0,1,1,2,42195,4,10050,70,1,4.1985074626865675,2.7916666666666665 +55,22.56029701,0,2,1,1,42195,2,11940,60,1,3.5339195979899496,3.316666666666667 +38,23.90596962,0,2,1,3,42195,2,13468,55,0,3.132981882981883,3.741111111111111 +49,23.84960938,0,2,3,1,42195,3,11998,44,1,3.5168361393565593,3.332777777777778 +29,25.74573708,1,2,2,1,42195,3,20430,33,0,2.065345080763583,5.675 +35,22.72189331,1,2,3,2,42195,2,13072,70,1,3.227891676866585,3.631111111111111 +40,24.30183029,1,2,3,2,42195,4,14750,55,1,2.860677966101695,4.097222222222222 +23,17.7299614,0,2,2,2,42195,3,9780,50,0,4.314417177914111,2.716666666666667 +36,22.0385685,0,2,1,1,42195,2,11395,97,1,3.702939885914875,3.1652777777777774 +36,23.03030205,1,1,1,1,42195,2,12291,54,1,3.432999755918965,3.4141666666666666 +49,23.3258419,0,1,3,1,42195,4,10701,62,0,3.9430894308943087,2.9724999999999997 +43,24.3044014,0,2,2,1,42195,3,11593,65,1,3.6396963684982317,3.220277777777778 +36,32.05936813,0,1,1,3,42195,3,15600,45,1,2.7048076923076922,4.333333333333333 +34,23.2491684,0,1,1,2,42195,3,13320,50,0,3.1677927927927927,3.7 +30,22.57785988,0,1,2,1,42195,3,11040,100,1,3.8220108695652173,3.066666666666667 +35,22.01950264,1,1,1,1,42195,3,12546,55,1,3.363223338115734,3.485 +28,23.08344269,0,1,3,1,42195,3,13320,46,0,3.1677927927927927,3.7 +30,20.03506279,1,2,1,3,42195,2,12996,45,0,3.246768236380425,3.61 +39,21.38588142,0,1,1,2,42195,4,11462,70,1,3.6812947129645788,3.1838888888888888 +31,22.68170738,1,2,1,1,42195,2,12204,60,0,3.457472959685349,3.39 +46,19.64085388,0,1,1,1,42195,3,9838,70,1,4.28898150030494,2.7327777777777778 +48,20.20202065,0,1,3,1,42195,3,10020,70,1,4.211077844311378,2.783333333333333 +48,20.76124382,1,2,1,1,42195,2,12072,55,1,3.4952783300198806,3.353333333333333 +32,25.89813232,0,2,2,1,42195,3,12075,57,1,3.4944099378881988,3.3541666666666665 +45,26.6554451,0,2,2,3,42195,2,15600,29,0,2.7048076923076922,4.333333333333333 +33,20.92764473,0,2,3,3,42195,3,10719,80,0,3.936467954100196,2.9775 +35,19.67647552,1,2,2,3,42195,2,11160,20,1,3.7809139784946235,3.1 +34,19.44146538,0,2,1,1,42195,3,10485,44,1,4.024320457796852,2.9125 +54,22.33406639,1,1,3,1,42195,3,12649,65,0,3.335836825045458,3.513611111111111 +32,26.16460609,1,1,3,1,42195,3,19200,45,1,2.19765625,5.333333333333333 +42,28.2599144,0,2,3,1,42195,2,21180,40,0,1.9922096317280453,5.883333333333334 +29,31.14186668,0,2,3,1,42195,2,13560,50,0,3.1117256637168142,3.7666666666666666 +37,24.22145271,0,2,1,2,42195,4,10558,65,0,3.996495548399318,2.932777777777778 +44,18.87354088,1,1,1,1,42195,2,10077,104,1,4.187258112533492,2.7991666666666664 +27,21.30681801,1,2,2,2,42195,3,15965,35,0,2.6429689946758534,4.434722222222222 +33,19.88636398,1,2,2,1,42195,2,11341,60,0,3.7205713781853453,3.150277777777778 +41,23.74960518,0,2,3,1,42195,3,13680,50,0,3.0844298245614037,3.8 +29,22.26345062,0,2,2,2,42195,4,10853,65,1,3.8878651064221876,3.0147222222222223 +33,24.20903206,0,2,1,1,42195,2,12060,58,1,3.4987562189054726,3.35 +34,23.04032135,0,2,2,2,42195,4,12197,60,1,3.459457243584488,3.3880555555555554 +30,21.25563812,1,1,1,1,42195,4,13020,40,1,3.2407834101382487,3.6166666666666667 +58,21.04377174,0,2,3,2,42195,2,10936,53,1,3.8583577176298465,3.037777777777778 +31,23.00573349,1,2,3,1,42195,2,12731,35,1,3.3143507972665147,3.536388888888889 +32,20.03710556,0,2,1,1,42195,4,11031,30,0,3.8251291813978785,3.0641666666666665 +42,24.6258564,0,1,3,1,42195,4,11043,58,1,3.8209725618038575,3.0675000000000003 +37,20.62209892,0,2,1,1,42195,4,10512,80,1,4.01398401826484,2.92 +37,22.14966202,0,2,3,1,42195,3,10576,100,1,3.989693645990923,2.937777777777778 +34,24.44100571,0,2,2,3,42195,2,10487,70,1,4.023552970344236,2.9130555555555557 +37,24.52615929,0,2,3,1,42195,3,11262,75,0,3.7466702184336707,3.128333333333333 +40,23.00556564,0,2,3,1,42195,3,14940,48,1,2.82429718875502,4.15 +21,21.24975014,0,2,3,2,42195,4,12300,45,0,3.430487804878049,3.4166666666666665 +28,23.56902504,0,2,2,2,42195,2,13680,60,0,3.0844298245614037,3.8 +22,25.68033028,0,2,3,1,42195,4,16330,23,0,2.583894672382119,4.536111111111111 +27,25.23191071,1,2,2,2,42195,2,14700,45,0,2.870408163265306,4.083333333333333 +30,23.37423897,1,2,3,1,42195,4,13530,60,0,3.1186252771618626,3.7583333333333333 +29,25.16514397,0,2,2,2,42195,2,12508,60,0,3.3734409977614326,3.4744444444444444 +25,22.51829529,0,1,3,1,42195,3,8447,125,1,4.995264590979046,2.346388888888889 +30,23.91919327,0,1,1,1,42195,4,13392,40,0,3.1507616487455197,3.7199999999999998 +35,20.24147606,1,2,1,1,42195,2,10610,65,0,3.976908576814326,2.9472222222222224 +32,19.37938118,0,3,2,1,42195,3,12158,35,0,3.4705543674946537,3.377222222222222 +35,22.91344261,0,1,2,2,42195,4,11100,80,1,3.8013513513513515,3.0833333333333335 +41,19.74053955,1,2,3,1,42195,3,12713,70,0,3.3190434987807755,3.531388888888889 +29,27.4961338,1,2,1,1,42195,3,18180,30,0,2.320957095709571,5.05 +34,25.25252724,0,2,2,1,42195,4,10550,65,1,3.999526066350711,2.930555555555556 +31,22.31223869,0,2,1,1,42195,4,12084,59,1,3.4918073485600796,3.356666666666667 +41,26.7320137,0,2,1,1,42195,4,11294,64,1,3.736054542234815,3.137222222222222 +29,25.20478821,0,2,2,3,42195,4,13080,40,0,3.2259174311926606,3.6333333333333333 +24,22.36208534,0,2,3,2,42195,4,15042,35,0,2.805145592341444,4.178333333333333 +36,21.38588142,1,1,1,1,42195,2,17310,40,0,2.4376083188908146,4.808333333333334 +35,29.21779823,1,2,3,3,42195,2,19545,26,1,2.1588641596316194,5.429166666666666 +26,21.46464729,0,2,2,1,42195,4,11301,62,0,3.7337403769577913,3.1391666666666667 +60,22.44668961,0,2,1,1,42195,2,13890,40,1,3.037796976241901,3.8583333333333334 +35,22.91344261,0,1,2,2,42195,3,12900,40,1,3.2709302325581397,3.5833333333333335 +41,23.67722511,0,2,1,1,42195,4,11100,60,1,3.8013513513513515,3.0833333333333335 +26,22.19460106,1,2,2,1,42195,3,15600,37,1,2.7048076923076922,4.333333333333333 +49,23.45083618,0,2,3,1,42195,3,11936,50,1,3.535103887399464,3.3155555555555556 +36,28.40909004,1,2,3,2,42195,3,19827,43,0,2.128158571644727,5.507499999999999 +37,20.55757523,0,1,3,1,42195,4,9468,90,1,4.4565906210392905,2.6300000000000003 +28,22.3776226,1,2,1,2,42195,3,15430,30,1,2.7346079066753077,4.286111111111111 +37,23.92344666,0,1,1,1,42195,3,12660,35,0,3.3329383886255926,3.5166666666666666 +23,22.3776226,1,2,2,1,42195,4,14460,50,0,2.91804979253112,4.016666666666667 +43,23.15398788,0,1,1,1,42195,2,15120,45,1,2.7906746031746033,4.2 +28,20.44668007,0,2,1,1,42195,3,8966,115,0,4.706111978585769,2.4905555555555554 +36,22.16610718,0,2,3,1,42195,4,10752,60,1,3.9243861607142856,2.9866666666666664 +39,21.91380501,0,1,1,1,42195,4,12574,60,0,3.355734054397964,3.4927777777777775 +55,22.51829529,0,2,3,1,42195,2,12752,38,0,3.308892722710163,3.542222222222222 +29,22.22222137,1,2,1,1,42195,2,11276,73,0,3.7420184462575383,3.1322222222222225 +32,18.39954567,0,2,2,3,42195,2,9848,70,1,4.284626320064988,2.7355555555555555 +30,24.20903206,0,3,1,1,42195,3,20591,35,0,2.04919625078918,5.719722222222222 +37,22.90478325,1,2,1,1,42195,2,11627,51,1,3.629053066139159,3.229722222222222 +41,21.86121559,1,2,1,1,42195,3,13322,67,1,3.167317219636691,3.7005555555555554 +33,18.03393936,0,2,3,1,42195,3,11009,65,1,3.832773185575438,3.0580555555555553 +38,33.73579407,1,2,1,1,42195,2,16258,40,1,2.5953376799114283,4.516111111111111 +28,23.29388237,0,2,1,1,42195,4,10839,65,0,3.8928867976750623,3.0108333333333333 +57,25.5287571,0,1,3,1,42195,2,13155,64,1,3.2075256556442415,3.654166666666667 +24,22.3776226,1,2,3,1,42195,2,16000,44,0,2.6371875,4.444444444444445 +45,20.81165504,1,2,1,2,42195,2,13620,40,0,3.0980176211453743,3.783333333333333 +25,23.08344269,0,1,1,2,42195,3,14160,50,1,2.979872881355932,3.933333333333333 +55,20.25152397,0,1,1,2,42195,2,10984,60,0,3.8414967225054624,3.051111111111111 +33,25.96887398,0,2,2,1,42195,3,12510,47,1,3.3729016786570742,3.475 +27,27.5420742,1,2,1,3,42195,2,20967,40,0,2.012448132780083,5.824166666666667 +37,28.05836296,1,3,1,3,42195,3,19326,40,1,2.183328158956846,5.368333333333334 +47,24.57002449,0,2,3,2,42195,4,12640,55,0,3.3382120253164556,3.511111111111111 +32,24.70283508,0,1,3,3,42195,2,11296,42,1,3.735393059490085,3.137777777777778 +36,20.86985588,1,1,2,3,42195,3,12115,63,1,3.4828724721419726,3.3652777777777776 +32,21.37073326,1,2,1,1,42195,4,12795,74,1,3.2977725674091443,3.5541666666666667 +39,21.86121559,1,2,1,1,42195,2,14400,40,0,2.9302083333333333,4 +60,22.14966202,0,2,3,1,42195,4,14460,56,0,2.91804979253112,4.016666666666667 +24,21.30681801,1,1,1,3,42195,4,13305,40,1,3.1713641488162345,3.6958333333333333 +32,22.3776226,1,2,1,1,42195,3,12971,80,1,3.253025981034616,3.6030555555555557 +47,25.97402573,0,1,1,1,42195,2,12466,55,1,3.384806674153698,3.462777777777778 +51,23.33714104,0,2,1,1,42195,4,14709,55,1,2.8686518458086887,4.085833333333333 +41,26.37949944,1,1,2,1,42195,4,19800,25,0,2.131060606060606,5.5 +50,23.00573349,0,1,2,2,42195,3,10563,62,1,3.9946038057370066,2.934166666666667 +64,24.37873268,0,2,3,1,42195,2,13397,62,0,3.149585728148093,3.721388888888889 +31,18.69939232,1,2,1,1,42195,3,16497,56,0,2.557737770503728,4.5825 +29,23.82097435,1,1,3,2,42195,2,11646,60,1,3.623132405976301,3.235 +51,31.84507942,0,2,2,1,42195,2,16330,30,0,2.583894672382119,4.536111111111111 +39,22.70872116,0,2,1,2,42195,3,12936,51,0,3.26182745825603,3.5933333333333333 +24,23.5923233,0,1,3,3,42195,2,9937,74,1,4.24625138371742,2.760277777777778 +53,23.14814949,0,2,2,1,42195,2,12220,55,1,3.452945990180033,3.3944444444444444 +46,22.86756706,0,2,2,2,42195,4,10309,90,0,4.093025511688816,2.863611111111111 +27,24.09897804,1,1,2,2,42195,3,13860,45,0,3.044372294372294,3.85 +33,21.5213356,1,2,1,1,42195,4,15780,40,0,2.673954372623574,4.383333333333334 +29,24.20903206,1,1,3,1,42195,2,14131,75,1,2.9859882527775814,3.925277777777778 +40,23.00556564,0,2,3,1,42195,3,11100,70,1,3.8013513513513515,3.0833333333333335 +37,19.55280685,0,2,3,1,42195,4,10834,84,1,3.8946834040982092,3.0094444444444446 +52,18.95542717,1,2,3,1,42195,3,17283,55,0,2.4414164207602846,4.800833333333333 +30,25.11189079,0,1,2,1,42195,2,11280,50,1,3.7406914893617023,3.1333333333333333 +33,20.48413086,1,2,1,1,42195,3,12837,56,0,3.286982939939238,3.565833333333333 +44,26.73796654,0,2,1,2,42195,3,14760,24,0,2.858739837398374,4.1 +33,23.20066833,0,2,3,1,42195,4,10229,87,1,4.12503666047512,2.8413888888888885 +40,20.41903305,1,2,3,1,42195,4,13110,70,0,3.2185354691075516,3.6416666666666666 +38,22.14966202,0,2,1,1,42195,4,10751,60,1,3.924751185936192,2.986388888888889 +38,20.19000053,1,2,2,2,42195,3,11226,64,1,3.7586851950828435,3.118333333333333 +31,22.7912426,0,1,2,1,42195,2,10395,72,1,4.059163059163059,2.8875 +42,31.72218323,1,2,3,2,42195,3,22860,35,0,1.8458005249343832,6.35 +47,23.70931816,0,2,1,3,42195,2,14122,33,0,2.987891233536326,3.9227777777777777 +34,19.85831833,1,2,1,1,42195,3,17700,50,0,2.3838983050847458,4.916666666666667 +34,22.4551754,0,2,3,1,42195,3,9696,20,1,4.351794554455446,2.6933333333333334 +47,25.92195129,0,1,2,2,42195,2,11968,50,1,3.5256517379679146,3.3244444444444445 +34,24.0420742,1,2,1,3,42195,3,17760,51,0,2.3758445945945947,4.933333333333334 +42,19.47665596,1,2,3,1,42195,3,13800,50,0,3.0576086956521737,3.8333333333333335 +42,27.4961338,0,2,1,1,42195,2,16020,30,0,2.6338951310861423,4.45 +31,22.47685242,1,2,3,2,42195,3,13610,42,0,3.100293901542983,3.780555555555556 +25,23.37423897,1,2,1,1,42195,3,12900,24,1,3.2709302325581397,3.5833333333333335 +30,23.88304138,0,2,1,1,42195,3,11700,32,0,3.6064102564102565,3.25 +32,17.75568008,1,1,2,3,42195,3,10923,60,1,3.8629497390826697,3.0341666666666667 +28,22.0347538,0,2,1,1,42195,4,17185,32,1,2.455338958393948,4.773611111111111 +27,21.20791435,0,2,2,2,42195,2,13747,45,0,3.0693969593365824,3.818611111111111 +45,23.64034653,0,1,1,1,42195,4,10595,110,1,3.9825389334591788,2.9430555555555555 +46,24.96845627,0,1,3,1,42195,4,14199,45,1,2.9716881470526095,3.944166666666667 +28,25.60000038,0,2,2,1,42195,3,15324,45,0,2.7535238841033673,4.256666666666667 +43,19.6209259,0,1,2,1,42195,3,11642,90,1,3.624377254767222,3.233888888888889 +51,21.74523163,0,2,1,2,42195,4,10782,80,1,3.9134668892598774,2.9949999999999997 +45,23.80480194,0,2,2,1,42195,4,12662,60,1,3.33241194124151,3.5172222222222222 +51,23.45083618,0,2,3,1,42195,3,12504,56,1,3.3745201535508635,3.4733333333333336 +38,28.2003727,0,2,1,2,42195,4,11880,45,0,3.551767676767677,3.3 +34,23.00556564,0,2,3,1,42195,4,10750,75,0,3.9251162790697673,2.986111111111111 +28,23.98990059,0,2,3,1,42195,4,11130,53,1,3.7911051212938007,3.091666666666667 +45,19.51649475,0,2,2,1,42195,2,13423,45,0,3.14348506295165,3.7286111111111113 +43,19.88636398,1,2,2,1,42195,3,12909,50,1,3.2686497792237974,3.5858333333333334 +34,24.44100571,0,2,3,3,42195,3,10710,65,1,3.939775910364146,2.975 +30,21.5213356,1,2,3,1,42195,4,19618,30,1,2.150830869609542,5.449444444444444 +45,20.33395195,0,2,2,3,42195,2,9930,82,1,4.2492447129909365,2.7583333333333333 +33,22.08595085,1,1,3,3,42195,3,12867,40,0,3.2793191886220563,3.5741666666666663 +47,21.83592224,0,2,2,1,42195,4,10747,86,1,3.926211966130083,2.9852777777777777 +43,23.14814949,0,2,1,1,42195,3,10200,45,0,4.136764705882353,2.8333333333333335 +38,19.04384041,0,2,1,1,42195,4,13069,75,1,3.228632642130232,3.6302777777777777 +52,18.46590805,1,2,2,1,42195,2,15120,45,1,2.7906746031746033,4.2 +41,31.45643044,0,2,3,2,42195,2,17623,42,0,2.3943142484253532,4.895277777777777 +31,23.64955711,1,2,1,1,42195,4,13230,50,1,3.189342403628118,3.675 +33,25.96857071,0,2,3,2,42195,3,12540,40,0,3.3648325358851676,3.4833333333333334 +70,20.48413086,1,2,2,2,42195,3,16691,48,1,2.5280091067042116,4.636388888888889 +26,25.04382706,0,2,2,1,42195,4,12000,40,0,3.51625,3.3333333333333335 +70,26.42340279,0,1,3,2,42195,3,17340,35,1,2.4333910034602075,4.816666666666666 +47,22.16610718,0,2,2,1,42195,3,12265,55,1,3.44027721157766,3.406944444444444 +47,18.32382584,1,2,1,2,42195,4,12184,80,0,3.4631483913328958,3.3844444444444446 +35,21.94683647,1,2,1,3,42195,2,17870,30,0,2.3612199216564074,4.963888888888889 +28,18.75529671,0,1,2,1,42195,4,9742,75,1,4.331246150687744,2.7061111111111114 +33,22.0466156,0,2,3,1,42195,3,11055,35,1,3.8168249660786975,3.0708333333333333 +35,27.54821014,1,2,2,1,42195,3,22186,24,1,1.9018750563418372,6.1627777777777775 +31,19.97297096,1,2,3,2,42195,4,14307,42,0,2.949255609142378,3.9741666666666666 +48,20.41903305,1,2,3,3,42195,3,11777,56,1,3.582830941665959,3.2713888888888887 +43,18.28758621,0,1,3,2,42195,2,15243,49,0,2.76815587482779,4.234166666666667 +42,22.03365898,1,2,2,1,42195,2,12619,38,1,3.3437673349710755,3.5052777777777777 +56,24.48979568,0,2,1,1,42195,4,16440,35,1,2.5666058394160585,4.566666666666666 +60,20.92764473,0,2,3,1,42195,2,17880,40,0,2.3598993288590604,4.966666666666667 +60,27.55586243,0,1,2,1,42195,3,17520,33,0,2.408390410958904,4.866666666666666 +35,22.19460106,1,2,2,1,42195,3,13127,50,1,3.2143673345014094,3.6463888888888887 +40,23.84960938,0,1,2,1,42195,3,11100,55,1,3.8013513513513515,3.0833333333333335 +44,24.5955925,1,2,3,1,42195,2,14400,40,1,2.9302083333333333,4 +30,21.30681801,1,2,2,2,42195,3,14043,34,1,3.0046998504593034,3.9008333333333334 +31,24.55106735,0,2,2,3,42195,4,13321,50,0,3.167554988364237,3.700277777777778 +30,24.18745041,1,2,2,1,42195,2,11940,65,1,3.5339195979899496,3.316666666666667 +27,20.91937065,0,2,1,1,42195,3,9665,85,0,4.365752715985515,2.6847222222222222 +45,25.79951668,1,2,2,1,42195,3,17340,34,0,2.4333910034602075,4.816666666666666 +43,22.50635719,0,2,1,1,42195,4,12274,52,1,3.437754603226332,3.4094444444444445 +47,25.04382706,1,2,1,1,42195,3,15660,40,0,2.6944444444444446,4.35 +28,24.04452515,0,2,1,1,42195,3,13378,41,1,3.1540589026760353,3.716111111111111 +39,26.41929436,0,1,1,1,42195,4,13520,55,0,3.1209319526627217,3.7555555555555555 +45,23.5923233,0,2,1,1,42195,3,11040,55,0,3.8220108695652173,3.066666666666667 +30,22.44668961,0,2,1,2,42195,3,13200,30,1,3.1965909090909093,3.6666666666666665 +43,25.97402573,0,2,1,3,42195,3,14220,36,0,2.967299578059072,3.95 +34,23.37423897,1,2,3,2,42195,2,12070,60,0,3.495857497928749,3.3527777777777774 +29,20.522686,1,2,2,1,42195,2,13326,31,0,3.1663665015758666,3.7016666666666667 +30,22.51829529,0,1,3,1,42195,3,13440,60,0,3.1395089285714284,3.7333333333333334 +27,20.15620995,1,2,3,1,42195,2,13328,42,0,3.165891356542617,3.7022222222222223 +35,25.27348328,0,2,2,1,42195,2,14354,35,1,2.939598718127351,3.987222222222222 +48,22.4551754,1,2,1,3,42195,2,18667,14,1,2.2604060641774253,5.185277777777778 +31,22.76068687,0,2,1,1,42195,3,12300,50,0,3.430487804878049,3.4166666666666665 +41,26.7320137,0,2,1,3,42195,2,13032,22,0,3.2377992633517496,3.6199999999999997 +28,21.49645615,0,2,1,2,42195,3,9363,71,1,4.506568407561679,2.6008333333333336 +53,23.37423897,0,2,3,1,42195,2,13321,47,0,3.167554988364237,3.700277777777778 +23,22.91344261,0,2,3,2,42195,2,17587,20,0,2.399215329504748,4.885277777777778 +33,22.35768318,0,1,2,2,42195,3,11467,78,0,3.6796895439086073,3.185277777777778 +44,20.86985588,1,2,1,1,42195,4,15458,50,0,2.7296545478069607,4.293888888888889 +32,24.48979568,0,2,3,1,42195,2,9787,83,0,4.311331357923777,2.718611111111111 +45,24.09897804,1,2,1,3,42195,2,17634,40,0,2.392820687308608,4.8983333333333325 +42,24.18745041,1,3,2,2,42195,2,17171,44,0,2.4573408654126143,4.769722222222223 +36,30.29488754,1,2,3,1,42195,3,16585,40,0,2.5441664154356345,4.606944444444445 +31,21.81818199,1,2,3,1,42195,4,14526,30,0,2.904791408508881,4.035 +34,21.74523163,0,2,2,1,42195,3,12180,50,0,3.4642857142857144,3.3833333333333333 +34,23.08344269,0,1,3,1,42195,2,12484,50,1,3.379926305671259,3.4677777777777776 +46,20.86985588,1,2,1,1,42195,2,15600,50,1,2.7048076923076922,4.333333333333333 +30,21.23309135,1,2,2,3,42195,3,15780,43,0,2.673954372623574,4.383333333333334 +37,23.27125931,1,2,2,2,42195,4,12826,50,1,3.289801964759083,3.562777777777778 +35,21.70465088,1,2,1,1,42195,3,13484,50,0,3.12926431326016,3.7455555555555553 +41,22.47685242,1,2,2,2,42195,4,17280,40,0,2.4418402777777777,4.8 +34,23.67722511,0,2,2,1,42195,3,13028,55,0,3.2387933681301813,3.618888888888889 +33,20.321558,0,2,1,1,42195,4,13450,50,0,3.137174721189591,3.736111111111111 +35,22.60793114,0,1,1,1,42195,4,9335,72,1,4.520085698982324,2.593055555555556 +26,19.79558945,1,2,2,1,42195,2,13523,48,1,3.120239591806552,3.7563888888888886 +40,25.37774658,0,2,2,1,42195,4,11691,55,1,3.609186553759302,3.2475 +41,22.90478325,1,2,1,1,42195,2,14940,35,1,2.82429718875502,4.15 +29,24.52615929,0,2,3,2,42195,2,14761,45,1,2.8585461689587426,4.100277777777778 +34,20.58569527,0,2,3,1,42195,2,14280,45,0,2.9548319327731094,3.966666666666667 +46,22.176784519999998,0,3,1,1,42195,2,12702,37,0,3.3219178082191783,3.5283333333333333 +33,21.49645615,0,2,1,1,42195,2,10527,52,0,4.008264462809917,2.9241666666666664 +25,22.19460106,1,1,2,1,42195,3,12495,38,1,3.376950780312125,3.470833333333333 +34,22.53944397,0,2,1,1,42195,2,11225,43,1,3.75902004454343,3.118055555555556 +34,21.74523163,1,2,1,2,42195,3,14050,50,1,3.003202846975089,3.9027777777777777 +35,30.5534687,0,2,1,2,42195,3,14264,17,0,2.9581463825014023,3.962222222222222 +45,25.24751663,0,2,1,1,42195,2,12882,40,1,3.275500698649278,3.578333333333333 +32,22.19567108,0,1,2,1,42195,2,10764,102,1,3.920011148272018,2.99 +36,23.16774559,1,2,2,1,42195,2,17101,60,1,2.4673995672767672,4.750277777777778 +36,25.6369915,0,2,2,2,42195,4,11580,55,0,3.643782383419689,3.216666666666667 +32,23.30609322,0,1,2,2,42195,3,15310,48,1,2.756041802743305,4.252777777777777 +35,24.37873268,1,2,1,2,42195,2,21290,37,1,1.9819163926726162,5.913888888888889 +39,22.44668961,0,2,3,3,42195,3,14161,50,0,2.979662453216581,3.9336111111111114 +32,23.78989792,0,2,3,1,42195,2,11384,75,1,3.7065179198875615,3.162222222222222 +28,17.5306797,1,1,1,2,42195,3,12180,60,0,3.4642857142857144,3.3833333333333333 +27,25.74573708,0,1,1,1,42195,4,11058,45,1,3.8157894736842106,3.0716666666666668 +24,20.77922058,0,2,3,2,42195,2,11951,50,1,3.530666889800017,3.3197222222222225 +24,20.77922058,0,2,3,1,42195,4,10448,55,1,4.038571975497703,2.902222222222222 +41,24.37873268,0,2,1,1,42195,4,14130,21,0,2.98619957537155,3.925 +28,20.9185257,0,1,1,2,42195,2,10717,63,1,3.9372025753475786,2.9769444444444444 +30,21.53769112,1,2,2,1,42195,4,12499,47,1,3.3758700696055683,3.4719444444444445 +45,23.49176788,0,2,1,2,42195,2,19200,40,1,2.19765625,5.333333333333333 +38,25.23191071,0,1,2,2,42195,4,11380,61,0,3.7078207381370825,3.161111111111111 +29,17.86459732,1,2,2,1,42195,2,14238,50,1,2.9635482511588704,3.955 +32,23.70815659,0,2,3,3,42195,3,10604,40,1,3.979158807996982,2.9455555555555555 +32,21.75764084,1,2,2,2,42195,3,12338,45,1,3.419922191603177,3.427222222222222 +34,18.36547279,1,2,3,3,42195,3,11040,70,1,3.8220108695652173,3.066666666666667 +28,21.59191895,0,2,3,1,42195,4,12180,57,1,3.4642857142857144,3.3833333333333333 +37,19.86567116,1,1,1,1,42195,3,11426,60,1,3.6928934010152283,3.173888888888889 +35,22.176784519999998,0,2,3,1,42195,3,10994,65,1,3.838002546843733,3.053888888888889 +33,25.82033348,0,2,3,1,42195,3,12581,18,0,3.3538669422144505,3.4947222222222223 +31,21.15354919,0,2,1,1,42195,3,10391,85,1,4.060725627947262,2.886388888888889 +43,25.95398712,0,2,1,1,42195,3,14820,65,0,2.847165991902834,4.116666666666666 +26,25.16318321,1,2,1,2,42195,3,11744,48,1,3.5928985013623977,3.262222222222222 +44,23.08344269,0,1,2,1,42195,2,12798,52,1,3.296999531176746,3.555 +24,17.87248611,0,2,1,1,42195,3,10577,75,1,3.989316441334972,2.9380555555555556 +36,26.61252975,0,1,2,1,42195,2,13560,20,0,3.1117256637168142,3.7666666666666666 +29,21.68908119,1,2,2,1,42195,2,14004,40,1,3.0130676949443016,3.89 +27,22.01950264,0,2,3,2,42195,3,12312,60,1,3.4271442495126707,3.42 +53,22.41249084,0,2,2,1,42195,3,13289,48,1,3.1751824817518246,3.6913888888888886 +33,22.44668961,0,2,2,1,42195,4,12480,52,0,3.3810096153846154,3.466666666666667 +21,21.97979736,0,2,2,1,42195,3,11580,50,1,3.643782383419689,3.216666666666667 +29,23.59925842,0,1,2,1,42195,2,10782,68,1,3.9134668892598774,2.9949999999999997 +28,25.23191071,0,1,3,1,42195,2,12964,60,0,3.2547824745448937,3.601111111111111 +32,31.38913918,0,2,1,1,42195,2,14896,60,1,2.8326396348012888,4.137777777777778 +46,25.96857071,0,2,2,1,42195,3,11433,60,1,3.6906323799527683,3.1758333333333337 +32,22.0347538,0,2,1,1,42195,4,14460,35,0,2.91804979253112,4.016666666666667 +30,20.98412895,0,2,2,2,42195,3,13200,31,1,3.1965909090909093,3.6666666666666665 +41,23.67722511,0,2,1,1,42195,3,15160,50,1,2.783311345646438,4.211111111111111 +33,24.50284004,1,2,2,1,42195,3,14822,40,0,2.8467818108217515,4.117222222222222 +44,23.74768066,0,1,1,1,42195,4,11680,50,1,3.612585616438356,3.2444444444444445 +35,23.82097435,1,2,1,2,42195,3,18010,40,0,2.3428650749583566,5.002777777777778 +34,25.53605843,0,2,3,2,42195,3,12900,55,1,3.2709302325581397,3.5833333333333335 +31,23.80480194,1,2,3,1,42195,3,14329,62,1,2.9447274757484823,3.980277777777778 +45,23.87511635,0,2,3,2,42195,3,12007,55,1,3.5142000499708503,3.335277777777778 +28,22.93917274,0,2,1,1,42195,3,15960,40,0,2.643796992481203,4.433333333333334 +25,23.64960861,1,1,2,1,42195,2,12355,65,1,3.415216511533792,3.4319444444444445 +35,23.24191475,0,2,1,1,42195,4,13260,50,0,3.182126696832579,3.683333333333333 +29,24.1929512,0,2,2,3,42195,4,12065,55,1,3.4973062577704104,3.351388888888889 +28,23.37423897,1,2,3,2,42195,3,13719,40,0,3.0756614913623443,3.8108333333333335 +44,23.56902504,0,1,3,1,42195,3,13779,55,1,3.062268669714783,3.8275 +45,28.55020332,0,2,2,3,42195,3,19740,55,0,2.1375379939209727,5.483333333333333 +43,23.90596962,0,1,3,2,42195,2,13242,62,1,3.1864521975532396,3.6783333333333332 +30,46.90821457,1,2,3,1,42195,3,18856,26,0,2.2377492575307594,5.237777777777778 +31,21.30681801,1,2,1,1,42195,2,14401,40,1,2.9300048607735576,4.000277777777778 +45,25.93618584,0,1,2,1,42195,3,12305,45,1,3.429093864282812,3.4180555555555556 +39,24.85795403,1,2,1,3,42195,3,13350,54,1,3.160674157303371,3.7083333333333335 +61,23.4375,0,2,2,1,42195,3,13811,65,0,3.0551734124972847,3.836388888888889 +39,23.74768066,0,2,2,1,42195,3,14400,50,1,2.9302083333333333,4 +30,21.86121559,1,2,3,2,42195,2,14040,38,0,3.0053418803418803,3.9 +38,19.6476059,0,2,1,1,42195,4,9888,65,1,4.267293689320389,2.746666666666667 +47,21.5194149,1,2,2,2,42195,3,17778,32,0,2.373439082011475,4.9383333333333335 +41,25.97402573,0,2,3,2,42195,3,14820,42,0,2.847165991902834,4.116666666666666 +39,27.1192646,1,2,2,1,42195,2,15420,40,1,2.7363813229571985,4.283333333333333 +27,25.89813232,0,1,3,2,42195,4,14278,45,0,2.955245832749685,3.966111111111111 +30,22.01950264,0,2,1,1,42195,3,12315,32,0,3.4263093788063337,3.4208333333333334 +36,23.82097435,1,2,3,1,42195,3,18120,40,0,2.3286423841059603,5.033333333333333 +57,23.98273849,0,2,2,1,42195,2,13067,68,0,3.229126807989592,3.629722222222222 +38,25.8121376,0,2,1,1,42195,4,14370,40,1,2.9363256784968685,3.9916666666666667 +26,26.37949944,1,2,1,1,42195,3,13874,50,0,3.041300273893614,3.8538888888888887 +41,27.45825768,0,2,3,1,42195,3,15570,30,0,2.710019267822736,4.325 +42,26.40168762,0,1,2,1,42195,3,12896,55,0,3.271944789081886,3.582222222222222 +42,19.9094696,0,1,2,1,42195,4,11311,55,1,3.730439395278932,3.141944444444445 +41,21.85183334,1,1,2,2,42195,3,15362,40,0,2.7467126676214035,4.267222222222222 +54,25.96887398,0,2,1,1,42195,3,15390,33,1,2.7417153996101367,4.275 +30,23.64960861,1,2,3,1,42195,2,15601,35,1,2.7046343183129284,4.333611111111111 +33,19.70556259,0,2,2,1,42195,4,9279,80,1,4.547365017782089,2.5775 +44,23.84960938,0,1,3,1,42195,2,11544,55,1,3.6551455301455302,3.2066666666666666 +39,20.88916206,1,1,1,1,42195,3,13110,55,1,3.2185354691075516,3.6416666666666666 +30,24.83164978,0,2,1,2,42195,4,12616,30,0,3.344562460367787,3.5044444444444447 +51,23.49176788,1,1,3,2,42195,2,13504,60,1,3.124629739336493,3.751111111111111 +40,20.3689785,1,2,3,2,42195,3,16097,45,0,2.6212958936447786,4.47138888888889 +66,21.5213356,0,1,2,1,42195,3,13332,55,0,3.164941494149415,3.703333333333333 +50,24.02800179,1,2,1,1,42195,3,14362,33,1,2.937961286728868,3.9894444444444446 +43,21.15354919,0,1,2,1,42195,3,18066,35,0,2.3356027897708405,5.0183333333333335 +35,24.48979568,0,2,3,2,42195,3,10788,58,1,3.911290322580645,2.996666666666667 +42,23.83880615,1,2,2,1,42195,2,14091,30,1,2.994464551841601,3.9141666666666666 +45,20.91937065,0,2,3,2,42195,3,10080,70,1,4.186011904761905,2.8 +52,24.85795403,1,2,1,1,42195,3,16200,65,0,2.60462962962963,4.5 +33,22.91344261,0,2,2,1,42195,4,10769,65,1,3.918191104095088,2.991388888888889 +33,20.58569527,0,2,3,1,42195,2,10491,50,1,4.022018873319989,2.9141666666666666 +52,26.16460609,0,1,2,1,42195,3,14406,45,0,2.928987921699292,4.001666666666667 +32,22.51829529,0,2,2,1,42195,3,10968,82,1,3.8471006564551424,3.046666666666667 +34,22.53944397,1,2,2,1,42195,2,12537,62,0,3.3656377123713805,3.4825 +41,22.02581596,0,2,2,1,42195,2,11752,44,1,3.5904526889040165,3.2644444444444445 +34,23.88304138,0,1,2,1,42195,4,12525,57,1,3.3688622754491018,3.4791666666666665 +33,25.24751663,0,1,3,1,42195,3,12344,40,0,3.4182598833441347,3.428888888888889 +57,21.85050583,0,2,1,2,42195,2,14282,50,0,2.9544181487186667,3.967222222222222 +37,23.51625824,0,2,3,1,42195,3,15851,34,1,2.6619771623241437,4.4030555555555555 +54,20.44668007,1,2,2,3,42195,2,14708,42,1,2.868846886048409,4.085555555555556 +58,23.82998085,0,2,2,2,42195,4,12420,58,1,3.3973429951690823,3.45 +37,27.07126808,0,2,3,2,42195,3,13412,45,1,3.1460632269609303,3.7255555555555557 +56,30.86419868,0,1,2,2,42195,2,19980,40,0,2.1118618618618616,5.55 +37,18.83348274,0,1,2,1,42195,2,8290,140,1,5.089867310012063,2.3027777777777776 +31,25.87862206,1,2,2,1,42195,2,15591,50,1,2.706369059072542,4.3308333333333335 +42,28.05194664,0,2,3,2,42195,3,14480,35,0,2.914019337016575,4.022222222222222 +29,20.95170403,1,2,3,3,42195,4,11972,50,1,3.5244737721349817,3.3255555555555554 +32,22.2054863,1,2,2,1,42195,3,13149,45,1,3.2089892767510837,3.6525000000000003 +31,29.0236969,0,2,2,3,42195,3,14598,20,1,2.890464447184546,4.055000000000001 +44,25.16514397,0,2,2,3,42195,4,13628,35,1,3.0961990020545933,3.7855555555555553 +34,19.04384041,0,1,3,1,42195,3,10377,100,0,4.0662041052327265,2.8825 +40,23.5923233,0,2,3,3,42195,3,11970,50,1,3.525062656641604,3.325 +25,22.03365898,1,2,1,1,42195,3,15301,53,0,2.757662897849814,4.250277777777778 +28,23.64960861,1,2,1,1,42195,4,16586,45,0,2.5440130230314724,4.607222222222222 +23,21.40518761,0,2,3,1,42195,4,11989,60,1,3.5194761865042956,3.330277777777778 +48,21.46464729,0,2,1,1,42195,3,12477,35,0,3.381822553498437,3.4658333333333333 +36,24.44100571,0,2,3,1,42195,4,13022,50,0,3.240285670403932,3.6172222222222223 +49,20.9185257,1,2,2,2,42195,4,14506,52,0,2.908796360126844,4.029444444444445 +26,27.19061852,0,2,2,2,42195,3,15360,35,0,2.7470703125,4.266666666666667 +32,24.0484848,0,2,2,1,42195,2,14127,45,0,2.9868337226587385,3.9241666666666664 +34,26.93602943,0,2,1,2,42195,2,9694,72,1,4.352692387043532,2.6927777777777777 +57,23.74768066,0,2,3,1,42195,2,17100,45,0,2.4675438596491226,4.75 +27,23.14814949,0,2,2,2,42195,4,11971,40,0,3.524768189791997,3.325277777777778 +33,24.8768692,0,1,3,1,42195,2,15338,35,0,2.7510105620028686,4.260555555555555 +44,24.32528305,1,2,2,1,42195,2,12543,60,1,3.364027744558718,3.484166666666667 +30,18.53243256,1,2,2,1,42195,2,14119,41,1,2.9885260995821232,3.9219444444444442 +31,23.74768066,1,2,3,2,42195,4,19343,32,0,2.181409295352324,5.373055555555555 +30,25.97402573,0,2,1,1,42195,3,11310,45,0,3.730769230769231,3.1416666666666666 +33,21.89049911,1,2,2,1,42195,2,12862,61,1,3.2805939978230447,3.572777777777778 +30,24.93506622,1,2,3,1,42195,2,16700,42,1,2.5266467065868263,4.638888888888888 +36,24.55106735,0,2,1,1,42195,2,11580,56,1,3.643782383419689,3.216666666666667 +49,20.91937065,1,1,1,1,42195,3,17520,38,0,2.408390410958904,4.866666666666666 +42,24.85058212,0,2,3,2,42195,3,13630,60,1,3.095744680851064,3.786111111111111 +32,22.72727203,0,3,1,1,42195,2,15000,30,0,2.813,4.166666666666667 +36,25.54157829,1,2,2,2,42195,3,15338,60,0,2.7510105620028686,4.260555555555555 +27,20.92425156,1,2,3,2,42195,3,13170,50,0,3.203872437357631,3.658333333333333 +30,22.01950264,1,2,1,1,42195,3,12337,55,1,3.4201994001783254,3.4269444444444446 +42,27.4961338,0,2,3,2,42195,3,13195,42,1,3.197802197802198,3.6652777777777774 +62,24.56541252,0,2,1,1,42195,3,17607,42,0,2.396490032373488,4.890833333333333 +31,19.97483444,1,2,1,1,42195,2,11302,80,0,3.7334100159263848,3.1394444444444445 +46,21.89049911,1,2,2,1,42195,3,13812,35,1,3.054952215464813,3.8366666666666664 +45,22.36208534,0,2,3,2,42195,2,11970,55,1,3.525062656641604,3.325 +28,21.60493851,0,2,1,1,42195,2,14392,34,0,2.9318371317398553,3.997777777777778 +25,19.70114517,1,1,3,1,42195,3,17940,42,1,2.3520066889632107,4.983333333333333 +35,24.34137344,0,2,3,1,42195,3,12055,30,0,3.5002073828287017,3.348611111111111 +23,23.27775955,1,2,2,1,42195,3,15660,18,0,2.6944444444444446,4.35 +30,24.48979568,0,2,2,1,42195,2,16110,45,0,2.6191806331471135,4.475 +44,24.23761749,0,2,3,1,42195,2,12289,63,1,3.433558466921637,3.413611111111111 +34,21.24975014,0,2,3,1,42195,3,12602,32,0,3.3482780511029997,3.5005555555555556 +41,25.20478821,0,2,1,1,42195,2,13500,20,1,3.1255555555555556,3.75 diff --git a/materials/R/worksheet_regression1/img/k-nn.jpeg b/materials/R/worksheet_regression1/img/k-nn.jpeg new file mode 100644 index 0000000..ff6acb3 Binary files /dev/null and b/materials/R/worksheet_regression1/img/k-nn.jpeg differ diff --git a/materials/R/worksheet_regression1/img/k-nn_RMSE.jpeg b/materials/R/worksheet_regression1/img/k-nn_RMSE.jpeg new file mode 100644 index 0000000..306d7ba Binary files /dev/null and b/materials/R/worksheet_regression1/img/k-nn_RMSE.jpeg differ diff --git a/materials/R/worksheet_regression1/worksheet_regression1.ipynb b/materials/R/worksheet_regression1/worksheet_regression1.ipynb new file mode 100644 index 0000000..56a9d95 --- /dev/null +++ b/materials/R/worksheet_regression1/worksheet_regression1.ipynb @@ -0,0 +1,1676 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "d4829b728b7113c57a089bba81d0b9ab", + "grade": false, + "grade_id": "cell-6093d584be2ad9ed", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "# Worksheet: Regression I: K-nearest neighbors\n", + "\n", + "This worksheet covers the [Regression I: K-nearest neighbors](https://datasciencebook.ca/regression1.html) chapter of the online textbook, which also lists the learning objectives for this worksheet. You should read the textbook chapter before attempting this worksheet. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "68e6cf3045316eb49bd79a27a9b60f9c", + "grade": false, + "grade_id": "cell-53085d5554f9dcc8", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "### Run this cell before continuing.\n", + "library(tidyverse)\n", + "library(repr)\n", + "library(tidymodels)\n", + "options(repr.matrix.max.rows = 6)\n", + "source('cleanup.R')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "004a90536551b93c7af8a8b5d3e92957", + "grade": false, + "grade_id": "cell-8e40e06455324a19", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "**Question 0.0** Multiple Choice: \n", + "
{points: 1}\n", + "\n", + "To predict a value of $Y$ for a new observation using $k$-nn **regression**, we identify the $k$-nearest neighbours and then:\n", + "\n", + "A. Assign it the median of the $k$-nearest neighbours as the predicted value\n", + "\n", + "B. Assign it the mean of the $k$-nearest neighbours as the predicted value\n", + "\n", + "C. Assign it the mode of the $k$-nearest neighbours as the predicted value\n", + "\n", + "D. Assign it the majority vote of the $k$-nearest neighbours as the predicted value\n", + "\n", + "*Save the letter of the answer you think is correct to a variable named `answer0.0`. Make sure your answer is an uppercase letter and is surrounded by quotation marks (e.g. `\"F\"`).*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "b2a94d99e15ed916391daed3643fe923", + "grade": false, + "grade_id": "cell-108236d99368cc44", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer0.0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "b73094b84d34433c99720e08ae57ca22", + "grade": true, + "grade_id": "cell-7d5938e33a38edeb", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer0.0 is not character\"= setequal(digest(paste(toString(class(answer0.0)), \"687d1\")), \"b0606851f8ab1403affe70dae88277a1\"))\n", + "stopifnot(\"length of answer0.0 is not correct\"= setequal(digest(paste(toString(length(answer0.0)), \"687d1\")), \"8be1aa3b66de219b03427ea3a762d5f1\"))\n", + "stopifnot(\"value of answer0.0 is not correct\"= setequal(digest(paste(toString(tolower(answer0.0)), \"687d1\")), \"29c90bbc32809ae085b6d948b602ab03\"))\n", + "stopifnot(\"letters in string value of answer0.0 are correct but case is not correct\"= setequal(digest(paste(toString(answer0.0), \"687d1\")), \"7ef26a441a8f13b24e927d8758ee9942\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "95e3437a546e5c50fc1f72c94db602de", + "grade": false, + "grade_id": "cell-73d7913554df0781", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "### RMSPE Definition\n", + "\n", + "**Question 0.1** Multiple Choice: \n", + "
{points: 1}\n", + "\n", + "What does RMSPE stand for?\n", + "\n", + "\n", + "A. root mean squared prediction error\n", + "\n", + "B. root mean squared percentage error \n", + "\n", + "C. root mean squared performance error \n", + "\n", + "D. root mean squared preference error \n", + "\n", + "*Save the letter of the answer you think is correct to a variable named `answer0.1`. Make sure you put quotations around the letter and pay attention to case.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "f164912efe33cd5ed2060a318346e2a6", + "grade": false, + "grade_id": "cell-81be027da315cb3f", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer0.1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "b06b6e1dee0dd6c9df975345e10496e8", + "grade": true, + "grade_id": "cell-db93265fd197f97f", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer0.1 is not character\"= setequal(digest(paste(toString(class(answer0.1)), \"8c44c\")), \"2afe90074fad3ec3878a60a49c4f380e\"))\n", + "stopifnot(\"length of answer0.1 is not correct\"= setequal(digest(paste(toString(length(answer0.1)), \"8c44c\")), \"5ef1b720a52eb9027ba23705d33bbf41\"))\n", + "stopifnot(\"value of answer0.1 is not correct\"= setequal(digest(paste(toString(tolower(answer0.1)), \"8c44c\")), \"85f9cc5e75fbe01d797bb60a41779cbd\"))\n", + "stopifnot(\"letters in string value of answer0.1 are correct but case is not correct\"= setequal(digest(paste(toString(answer0.1), \"8c44c\")), \"ab7de5459f5be145c6006d1c73ee311e\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "a5e9f226b02857ae7e1850945c63fd68", + "grade": false, + "grade_id": "cell-b0f4fa2237ef0429", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 0.2** Multiple Choice: \n", + "
{points: 1}\n", + "\n", + "Of those shown below, which is the correct formula for RMSPE?\n", + "\n", + "\n", + "A. $RMSPE = \\sqrt{\\frac{\\frac{1}{n}\\sum\\limits_{i=1}^{n}(y_i - \\hat{y_i})^2}{1 - n}}$\n", + "\n", + "B. $RMSPE = \\sqrt{\\frac{1}{n - 1}\\sum\\limits_{i=1}^{n}(y_i - \\hat{y_i})^2}$\n", + "\n", + "C. $RMSPE = \\sqrt{\\frac{1}{n}\\sum\\limits_{i=1}^{n}(y_i - \\hat{y_i})^2}$\n", + "\n", + "D. $RMSPE = \\sqrt{\\frac{1}{n}\\sum\\limits_{i=1}^{n}(y_i - \\hat{y_i})}$ \n", + "\n", + "*Save the letter of the answer you think is correct to a variable named `answer0.2`. Make sure you put quotations around the letter and pay attention to case.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "0517b55812bcad016fd4e921018fc075", + "grade": false, + "grade_id": "cell-93c565d6974de38f", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer0.2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "c27234f45a3a9cccef78e692f8cafe3d", + "grade": true, + "grade_id": "cell-b1e66491132c1e30", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer0.2 is not character\"= setequal(digest(paste(toString(class(answer0.2)), \"9bea8\")), \"3fdc4aab2072a9cfa24a7f38521b6a97\"))\n", + "stopifnot(\"length of answer0.2 is not correct\"= setequal(digest(paste(toString(length(answer0.2)), \"9bea8\")), \"c13695fa7859703bd79c245a30e9062b\"))\n", + "stopifnot(\"value of answer0.2 is not correct\"= setequal(digest(paste(toString(tolower(answer0.2)), \"9bea8\")), \"0404480a667292e5fc0a7fa025a8ec1b\"))\n", + "stopifnot(\"letters in string value of answer0.2 are correct but case is not correct\"= setequal(digest(paste(toString(answer0.2), \"9bea8\")), \"53ddfcba7833c00fc9fcc981a95a8ee2\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "654988da1cfd73473caa7dccd2e3bbc3", + "grade": false, + "grade_id": "cell-ded5f8fa4e621b81", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "**Question 0.3**\n", + "
{points: 1}\n", + "\n", + "The plot below is a very simple k-nn regression example, where the black dots are the data observations and the blue line is the predictions from a $k$-nn regression model created from this data where $k=2$.\n", + "\n", + "Using the formula for RMSE (given in the reading), and the graph below, by hand (pen and paper or use R as a calculator) calculate RMSE for this model. Use **one decimal place of precision** when inputting the heights of the black dots and blue line. Save your answer to a variable named `answer0.3` \n", + "\n", + "Notes:\n", + " - RMSE (Root Mean Squared Error) evaluates model performance on **training data**. \n", + " - RMSPE (Root Mean Squared Prediction Error) measures performance on **testing or validation data**\n", + " - The predicted value when x = 1 is 1.3 (it's a bit hard to tell from the figure!) \n", + "\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "7fbf2d7959474537e5e864b1349839b1", + "grade": false, + "grade_id": "cell-8dc7cc6f35de80cb", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer0.3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "d7d52ee25b3e5201424ee91dfee3295f", + "grade": true, + "grade_id": "cell-d90b3e7b890ce86b", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of round(answer0.3, 2) is not numeric\"= setequal(digest(paste(toString(class(round(answer0.3, 2))), \"2645c\")), \"890f753574357a14c6dc46b6fde1d4a1\"))\n", + "stopifnot(\"value of round(answer0.3, 2) is not correct (rounded to 2 decimal places)\"= setequal(digest(paste(toString(round(round(answer0.3, 2), 2)), \"2645c\")), \"584909b564382325b5e2e51f497a8f06\"))\n", + "stopifnot(\"length of round(answer0.3, 2) is not correct\"= setequal(digest(paste(toString(length(round(answer0.3, 2))), \"2645c\")), \"654f47c4e06ab41e08e86365cf3674de\"))\n", + "stopifnot(\"values of round(answer0.3, 2) are not correct\"= setequal(digest(paste(toString(sort(round(round(answer0.3, 2), 2))), \"2645c\")), \"584909b564382325b5e2e51f497a8f06\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "c94e4bc2ed7206d48d7655fae0781008", + "grade": false, + "grade_id": "cell-e612110169987c7b", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "## Marathon Training\n", + "\n", + "\n", + "\n", + "Source: https://media.giphy.com/media/nUN6InE2CodRm/giphy.gif\n", + "\n", + "What predicts which athletes will perform better than others? Specifically, we are interested in marathon runners, and looking at how the maximum distance ran per week (in miles) during race training predicts the time it takes a runner to finish the race? For this, we will be looking at the `marathon.csv` file in the `data/` folder." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "9ea30c1434a67e3876226cd543d43d4c", + "grade": false, + "grade_id": "cell-16d7d432c49c9e53", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "**Question 1.0** \n", + "
{points: 1}\n", + "\n", + "Load the data and assign it to an object called `marathon`. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "e808fb877b552ca26971fdb18432c347", + "grade": false, + "grade_id": "cell-5376dc2c39983258", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "marathon" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "f158322eb711309083b0018fbff50ea4", + "grade": true, + "grade_id": "cell-60554f6f6a56a16c", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"marathon should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon)), \"5ad7f\")), \"5dc9c7456ffe76eefefacaf68de81da5\"))\n", + "stopifnot(\"dimensions of marathon are not correct\"= setequal(digest(paste(toString(dim(marathon)), \"5ad7f\")), \"6d38bc2129d410a7271441253593089b\"))\n", + "stopifnot(\"column names of marathon are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon))), \"5ad7f\")), \"caee9f419eb341232e7caf99aae02dbb\"))\n", + "stopifnot(\"types of columns in marathon are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon, class)))), \"5ad7f\")), \"7f418de7bf624da22d05cf8350c89fb9\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon, is.numeric))) sort(round(sapply(marathon[, sapply(marathon, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"5ad7f\")), \"7bde6e3e524402788cd80a3dbcaefc36\"))\n", + "stopifnot(\"values in one or more character columns in marathon are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon, is.character))) sum(sapply(marathon[sapply(marathon, is.character)], function(x) length(unique(x)))) else 0), \"5ad7f\")), \"11e84deb8b0074672fa0750b8d32c822\"))\n", + "stopifnot(\"values in one or more factor columns in marathon are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon, is.factor))) sum(sapply(marathon[, sapply(marathon, is.factor)], function(col) length(unique(col)))) else 0), \"5ad7f\")), \"11e84deb8b0074672fa0750b8d32c822\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "19151cf672ee9d30f6c913c2c409d37d", + "grade": false, + "grade_id": "cell-26802c14f46a4d8e", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "**Question 2.0**\n", + "
{points: 1}\n", + "\n", + "We want to predict race time (in hours) (`time_hrs`) given a particular value of maximum distance ran per week (in miles) during race training (`max`). Let's take a subset of size 50 individuals of our marathon data and assign it to an object called `marathon_50`. With this subset, plot a scatterplot to assess the relationship between these two variables. Put `time_hrs` on the y-axis and `max` on the x-axis. **Assign this plot to an object called `answer2`.** Discuss, with a classmate, the relationship between race time and maximum distance ran per week during training based on the scatterplot you create below. \n", + "\n", + "*Hint: To take a subset of your data you can use the `slice_sample()` function*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "fae00bce988bfcb06ccbe4b969adc789", + "grade": false, + "grade_id": "cell-6fcfc6ca32883f4d", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "options(repr.plot.width = 8, repr.plot.height = 7)\n", + "\n", + "set.seed(2000) ### DO NOT CHANGE\n", + "\n", + "#... <- ... |>\n", + "# slice_sample(n = ...)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "56be3f23b8abedaf034cf4bde6251ad2", + "grade": true, + "grade_id": "cell-78ac50fb41cd9a86", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"marathon_50 should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon_50)), \"9e3b0\")), \"f648a715ec3a68a64053d2106e756ee9\"))\n", + "stopifnot(\"dimensions of marathon_50 are not correct\"= setequal(digest(paste(toString(dim(marathon_50)), \"9e3b0\")), \"d5867fbcf32b27e63a31d963e2778026\"))\n", + "stopifnot(\"column names of marathon_50 are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon_50))), \"9e3b0\")), \"8cbb07f1cf13637e1d3f1c8128fe5261\"))\n", + "stopifnot(\"types of columns in marathon_50 are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon_50, class)))), \"9e3b0\")), \"8805ddc99e3eba80b2db048da1b00656\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon_50 are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_50, is.numeric))) sort(round(sapply(marathon_50[, sapply(marathon_50, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"9e3b0\")), \"8643938e5e8f4fd61198304546a1dcc6\"))\n", + "stopifnot(\"values in one or more character columns in marathon_50 are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_50, is.character))) sum(sapply(marathon_50[sapply(marathon_50, is.character)], function(x) length(unique(x)))) else 0), \"9e3b0\")), \"50aa503f2ae3e0cb19f0e21034a1e821\"))\n", + "stopifnot(\"values in one or more factor columns in marathon_50 are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_50, is.factor))) sum(sapply(marathon_50[, sapply(marathon_50, is.factor)], function(col) length(unique(col)))) else 0), \"9e3b0\")), \"50aa503f2ae3e0cb19f0e21034a1e821\"))\n", + "\n", + "stopifnot(\"type of plot is not correct (if you are using two types of geoms, try flipping the order of the geom objects!)\"= setequal(digest(paste(toString(sapply(seq_len(length(answer2$layers)), function(i) {c(class(answer2$layers[[i]]$geom))[1]})), \"9e3b1\")), \"fe9a3330961d28c5ecdb6766ba3592d8\"))\n", + "stopifnot(\"variable x is not correct\"= setequal(digest(paste(toString(unlist(lapply(sapply(seq_len(length(answer2$layers)), function(i) {rlang::get_expr(c(answer2$layers[[i]]$mapping, answer2$mapping)$x)}), as.character))), \"9e3b1\")), \"697608c06e95946e39213ecc7ba8022b\"))\n", + "stopifnot(\"variable y is not correct\"= setequal(digest(paste(toString(unlist(lapply(sapply(seq_len(length(answer2$layers)), function(i) {rlang::get_expr(c(answer2$layers[[i]]$mapping, answer2$mapping)$y)}), as.character))), \"9e3b1\")), \"39a0150a1a3ee6b63305919920a11d21\"))\n", + "stopifnot(\"x-axis label is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(answer2$layers[[1]]$mapping, answer2$mapping)$x)!= answer2$labels$x), \"9e3b1\")), \"882d81a5500cdab7c2502ead198340c1\"))\n", + "stopifnot(\"y-axis label is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(answer2$layers[[1]]$mapping, answer2$mapping)$y)!= answer2$labels$y), \"9e3b1\")), \"882d81a5500cdab7c2502ead198340c1\"))\n", + "stopifnot(\"incorrect colour variable in answer2, specify a correct one if required\"= setequal(digest(paste(toString(rlang::get_expr(c(answer2$layers[[1]]$mapping, answer2$mapping)$colour)), \"9e3b1\")), \"5094e6199ef0447e1850cc341c9594ea\"))\n", + "stopifnot(\"incorrect shape variable in answer2, specify a correct one if required\"= setequal(digest(paste(toString(rlang::get_expr(c(answer2$layers[[1]]$mapping, answer2$mapping)$shape)), \"9e3b1\")), \"5094e6199ef0447e1850cc341c9594ea\"))\n", + "stopifnot(\"the colour label in answer2 is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(answer2$layers[[1]]$mapping, answer2$mapping)$colour) != answer2$labels$colour), \"9e3b1\")), \"5094e6199ef0447e1850cc341c9594ea\"))\n", + "stopifnot(\"the shape label in answer2 is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(answer2$layers[[1]]$mapping, answer2$mapping)$colour) != answer2$labels$shape), \"9e3b1\")), \"5094e6199ef0447e1850cc341c9594ea\"))\n", + "stopifnot(\"fill variable in answer2 is not correct\"= setequal(digest(paste(toString(quo_name(answer2$mapping$fill)), \"9e3b1\")), \"2697b9782ee1dcc08fd59f501a0d2b1b\"))\n", + "stopifnot(\"fill label in answer2 is not informative\"= setequal(digest(paste(toString((quo_name(answer2$mapping$fill) != answer2$labels$fill)), \"9e3b1\")), \"5094e6199ef0447e1850cc341c9594ea\"))\n", + "stopifnot(\"position argument in answer2 is not correct\"= setequal(digest(paste(toString(class(answer2$layers[[1]]$position)[1]), \"9e3b1\")), \"62aeaa372676e7b727922d989fbb01ef\"))\n", + "\n", + "stopifnot(\"answer2$data should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(answer2$data)), \"9e3b2\")), \"d56807b5efb6d39a647f5f9f5d214e64\"))\n", + "stopifnot(\"dimensions of answer2$data are not correct\"= setequal(digest(paste(toString(dim(answer2$data)), \"9e3b2\")), \"9c0858ff2b06e5cd35f0cd2479aed7b8\"))\n", + "stopifnot(\"column names of answer2$data are not correct\"= setequal(digest(paste(toString(sort(colnames(answer2$data))), \"9e3b2\")), \"dd976f74510f1315e62f5f989f301774\"))\n", + "stopifnot(\"types of columns in answer2$data are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(answer2$data, class)))), \"9e3b2\")), \"ade7b8d4b09aaa303ad5f3f0221afa86\"))\n", + "stopifnot(\"values in one or more numerical columns in answer2$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(answer2$data, is.numeric))) sort(round(sapply(answer2$data[, sapply(answer2$data, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"9e3b2\")), \"60f21159c4706fd7d4b459d51a844e99\"))\n", + "stopifnot(\"values in one or more character columns in answer2$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(answer2$data, is.character))) sum(sapply(answer2$data[sapply(answer2$data, is.character)], function(x) length(unique(x)))) else 0), \"9e3b2\")), \"7850fd9f1acc1d6d1335ab1e14118c5b\"))\n", + "stopifnot(\"values in one or more factor columns in answer2$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(answer2$data, is.factor))) sum(sapply(answer2$data[, sapply(answer2$data, is.factor)], function(col) length(unique(col)))) else 0), \"9e3b2\")), \"7850fd9f1acc1d6d1335ab1e14118c5b\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "34f89317595dbfcb55227675d68dc3a2", + "grade": false, + "grade_id": "cell-cbe9f9a5c4b7c264", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "**Question 3.0**\n", + "
{points: 1}\n", + "\n", + "Suppose we want to predict the race time for someone who ran a maximum distance of 100 miles per week during training. In the plot below we can see that no one has run a maximum distance of 100 miles per week. But, if we are interested in prediction, how can we predict with this data? We can use $k$-nn regression! To do this we get the $Y$ values (target/response variable) of the nearest $k$ values and then take their average and use that as the prediction. \n", + "\n", + "For this question predict the race time based on the 4 closest neighbors to the 100 miles per week during training.\n", + "\n", + "*Fill in the scaffolding below and assign your answer to an object named `answer3`.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "75efaaf8fe04ae9b8a48d555d95b030b", + "grade": false, + "grade_id": "cell-51d4b16bee915d31", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# run this cell to see a visualization of the 4 nearest neighbours\n", + "options(repr.plot.height = 6, repr.plot.width = 7)\n", + "marathon_50 |>\n", + " ggplot(aes(x = max, y = time_hrs)) + \n", + " geom_point(color = 'dodgerblue', alpha = 0.4) +\n", + " geom_vline(xintercept = 100, linetype = \"dotted\") +\n", + " xlab(\"Maximum Distance Ran per \\n Week During Training (mi)\") +\n", + " ylab(\"Race Time (hours)\") + \n", + " geom_segment(aes(x = 100, y = 2.56, xend = 107, yend = 2.56), col = \"orange\") +\n", + " geom_segment(aes(x = 100, y = 2.65, xend = 90, yend = 2.65), col = \"orange\") +\n", + " geom_segment(aes(x = 100, y = 2.99, xend = 86, yend = 2.99), col = \"orange\") +\n", + " geom_segment(aes(x = 100, y = 3.05, xend = 82, yend = 3.05), col = \"orange\") +\n", + " theme(text = element_text(size = 20))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "4babff4645a19226f94b42c479b7645c", + "grade": false, + "grade_id": "cell-a8d1b15c312d9c3d", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "#... <- ... |> \n", + "# mutate(diff = abs(100 - ...)) |> \n", + "# ...(diff, ...) |> \n", + "# summarise(predicted = ...(...)) |>\n", + "# pull()\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "40efca56d6817e8226aadabbff223a30", + "grade": true, + "grade_id": "cell-af7afd4794001c7b", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of round(answer3, 2) is not numeric\"= setequal(digest(paste(toString(class(round(answer3, 2))), \"6a21c\")), \"c9d2cea1a4dac1d859d0730c044d41eb\"))\n", + "stopifnot(\"value of round(answer3, 2) is not correct (rounded to 2 decimal places)\"= setequal(digest(paste(toString(round(round(answer3, 2), 2)), \"6a21c\")), \"08e6448336d1a3eee509e49a52478ab3\"))\n", + "stopifnot(\"length of round(answer3, 2) is not correct\"= setequal(digest(paste(toString(length(round(answer3, 2))), \"6a21c\")), \"f1a7515d3d2d2fa10482af081e950276\"))\n", + "stopifnot(\"values of round(answer3, 2) are not correct\"= setequal(digest(paste(toString(sort(round(round(answer3, 2), 2))), \"6a21c\")), \"08e6448336d1a3eee509e49a52478ab3\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "b1f9aa206f133969fcff7bf2009bbd0e", + "grade": false, + "grade_id": "cell-0c2107e7f52efb3d", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 4.0**\n", + "
{points: 1}\n", + "\n", + "For this question, let's instead predict the race time based on the 2 closest neighbors to the 100 miles per week during training.\n", + "\n", + "*Assign your answer to an object named `answer4`.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "3371f7bb2081a1f1eb48c702eebd05b6", + "grade": false, + "grade_id": "cell-44994bbd299ff0d0", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "cf11ee56927a5f67cce41fab07df1be4", + "grade": true, + "grade_id": "cell-690e322810064165", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of round(answer4, 2) is not numeric\"= setequal(digest(paste(toString(class(round(answer4, 2))), \"9a73b\")), \"a47e4de9e1b3b13d1772b79fd8689fa1\"))\n", + "stopifnot(\"value of round(answer4, 2) is not correct (rounded to 2 decimal places)\"= setequal(digest(paste(toString(round(round(answer4, 2), 2)), \"9a73b\")), \"26bd1aeeaa56c6025f423f5b2763517d\"))\n", + "stopifnot(\"length of round(answer4, 2) is not correct\"= setequal(digest(paste(toString(length(round(answer4, 2))), \"9a73b\")), \"f3f027e877cfe92c5d8d7ec45f5ba816\"))\n", + "stopifnot(\"values of round(answer4, 2) are not correct\"= setequal(digest(paste(toString(sort(round(round(answer4, 2), 2))), \"9a73b\")), \"26bd1aeeaa56c6025f423f5b2763517d\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "a7b5791bb4f841adb405ff5f20f2fc83", + "grade": false, + "grade_id": "cell-3311ba44befa3767", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "**Question 5.0** Multiple Choice:\n", + "
{points: 1}\n", + "\n", + "So far you have calculated the $k$ nearest neighbors predictions manually based on $k$'s we have told you to use. However, last week we learned how to use a better method to choose the best $k$ for classification. \n", + "\n", + "Based on what you learned last week and what you have learned about $k$-nn regression so far this week, which method would you use to choose the $k$ (in the situation where we don't tell you which $k$ to use)?\n", + "\n", + "- A) Choose the $k$ that excludes most outliers\n", + "- B) Choose the $k$ with the lowest training error\n", + "- C) Choose the $k$ with the lowest cross-validation error\n", + "- D) Choose the $k$ that includes the most data points\n", + "- E) Choose the $k$ with the lowest testing error\n", + "\n", + "*Assign your answer to an object called `answer5`. Make sure your answer is an uppercase letter and is surrounded by quotation marks (e.g. `\"F\"`).*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "fe8b9cbbbd4aa881d0e9ab40c0a0b96e", + "grade": false, + "grade_id": "cell-01b498d6b1415bf5", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer5" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "58622171f4b7eebca91c18d83f42b6c2", + "grade": true, + "grade_id": "cell-7cbe965dbdb7228d", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer5 is not character\"= setequal(digest(paste(toString(class(answer5)), \"37693\")), \"fc6c3130145f0fb80210523789d385c4\"))\n", + "stopifnot(\"length of answer5 is not correct\"= setequal(digest(paste(toString(length(answer5)), \"37693\")), \"2b4eb9a0e93cd3a15acd636e0a9e8a04\"))\n", + "stopifnot(\"value of answer5 is not correct\"= setequal(digest(paste(toString(tolower(answer5)), \"37693\")), \"e83660aa1020beda884cddb739a36f9e\"))\n", + "stopifnot(\"letters in string value of answer5 are correct but case is not correct\"= setequal(digest(paste(toString(answer5), \"37693\")), \"5c12e05e798903dd9637c879b3219bb2\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "1ac1a0e15ea3ffa70b2260b0aa682b40", + "grade": false, + "grade_id": "cell-b3b34499fe540e49", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "**Question 6.0**\n", + "
{points: 1}\n", + "\n", + "We have just seen how to perform k-nn regression manually, now we will apply it to the whole dataset using the `tidymodels` package. To do so, we will first need to create the training and testing datasets. Split the data using *75%* of the `marathon` data as your training set and set `time_hrs` as the `strata` argument. Store this data into an object called `marathon_split`. \n", + "\n", + "Then, use the appropriate `training` and `testing` functions to create your training set which you will call `marathon_training` and your testing set which you will call `marathon_testing`. Remember we won't touch the test dataset until the end. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "f1bfffda9ee38c9d79c21ae78613aac4", + "grade": false, + "grade_id": "cell-8257a9338314019e", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "set.seed(2000) ### DO NOT CHANGE\n", + "\n", + "#... <- initial_split(..., prop = ..., strata = ...)\n", + "#... <- training(...)\n", + "#... <- testing(...)\n", + "\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "b98ad482754c638881e3c0db12763b9e", + "grade": true, + "grade_id": "cell-381c8aeacf15fd35", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of 'rsplit' %in% class(marathon_split) is not logical\"= setequal(digest(paste(toString(class('rsplit' %in% class(marathon_split))), \"995eb\")), \"c37a6a3d32ece56dbf2983b316d56f96\"))\n", + "stopifnot(\"logical value of 'rsplit' %in% class(marathon_split) is not correct\"= setequal(digest(paste(toString('rsplit' %in% class(marathon_split)), \"995eb\")), \"76c52a1e5ab0897e6e3a3035b2593922\"))\n", + "\n", + "stopifnot(\"marathon_training should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon_training)), \"995ec\")), \"501a47a75b2b457beff8e2ac89a8324c\"))\n", + "stopifnot(\"dimensions of marathon_training are not correct\"= setequal(digest(paste(toString(dim(marathon_training)), \"995ec\")), \"ad2869e2d34a6446cea1f65866f28007\"))\n", + "stopifnot(\"column names of marathon_training are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon_training))), \"995ec\")), \"63ef62bbcc5de6b68df5d6cb8caf22f4\"))\n", + "stopifnot(\"types of columns in marathon_training are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon_training, class)))), \"995ec\")), \"a5ba97618641aaa7581f575a22d28ebb\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon_training are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_training, is.numeric))) sort(round(sapply(marathon_training[, sapply(marathon_training, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"995ec\")), \"ed3dc19b532630b6bee7218c98fe8eaa\"))\n", + "stopifnot(\"values in one or more character columns in marathon_training are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_training, is.character))) sum(sapply(marathon_training[sapply(marathon_training, is.character)], function(x) length(unique(x)))) else 0), \"995ec\")), \"63f3d6429054bdfc33cca12ddf1b6add\"))\n", + "stopifnot(\"values in one or more factor columns in marathon_training are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_training, is.factor))) sum(sapply(marathon_training[, sapply(marathon_training, is.factor)], function(col) length(unique(col)))) else 0), \"995ec\")), \"63f3d6429054bdfc33cca12ddf1b6add\"))\n", + "\n", + "stopifnot(\"marathon_testing should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon_testing)), \"995ed\")), \"368652787cd859898436316bcd337f05\"))\n", + "stopifnot(\"dimensions of marathon_testing are not correct\"= setequal(digest(paste(toString(dim(marathon_testing)), \"995ed\")), \"1de930989154e3098eadae58a71a200c\"))\n", + "stopifnot(\"column names of marathon_testing are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon_testing))), \"995ed\")), \"3219387bce679fc5b3707d5e9f8a69aa\"))\n", + "stopifnot(\"types of columns in marathon_testing are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon_testing, class)))), \"995ed\")), \"82f32f5c3c8d2ba42979f74327ea0d94\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon_testing are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_testing, is.numeric))) sort(round(sapply(marathon_testing[, sapply(marathon_testing, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"995ed\")), \"17a9b62568b0ae6c109f07cb1e94c751\"))\n", + "stopifnot(\"values in one or more character columns in marathon_testing are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_testing, is.character))) sum(sapply(marathon_testing[sapply(marathon_testing, is.character)], function(x) length(unique(x)))) else 0), \"995ed\")), \"da4b3b1d7a3eb9fbf3c9b437f2cb2306\"))\n", + "stopifnot(\"values in one or more factor columns in marathon_testing are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_testing, is.factor))) sum(sapply(marathon_testing[, sapply(marathon_testing, is.factor)], function(col) length(unique(col)))) else 0), \"995ed\")), \"da4b3b1d7a3eb9fbf3c9b437f2cb2306\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "f592eb9ca03dafc6d078406ffa752028", + "grade": false, + "grade_id": "cell-0c01c0b6cf4d8e91", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 7.0**\n", + "
{points: 1}\n", + "\n", + "Next, we’ll use cross-validation on our **training data** to choose $k$. In $k$-nn classification, we used accuracy to see how well our predictions matched the true labels. In the context of $k$-nn *regression*, we will use RMSPE instead. Interpreting the RMSPE value can be tricky but generally speaking, if the prediction values are very close to the true values, the RMSPE will be small. Conversely, if the prediction values are *not* very close to the true values, the RMSPE will be quite large. \n", + "\n", + "Let's perform a cross-validation and choose the optimal $k$. First, create a model specification for $k$-nn. We are still using the $k$-nearest neighbours algorithm, and so we will still use the same package for the model engine as we did in classification (`\"kknn\"`). As usual, specify that we want to use the *straight-line distance*. However, since this will be a regression problem, we will use `set_mode(\"regression\")` in the model specification. Store your model specification in an object called `marathon_spec`. \n", + "\n", + "Moreover, create a recipe to preprocess our data. Store your recipe in an object called `marathon_recipe`. The recipe should specify that the response variable is race time (hrs) and the predictor is maximum distance ran per week during training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "31868ce196b9c3e528132deaef193f83", + "grade": false, + "grade_id": "cell-766f1e094dd32efc", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "set.seed(1234) #DO NOT REMOVE\n", + "\n", + "#... <- nearest_neighbor(weight_func = ..., neighbors = ...) |> \n", + "# set_engine(...) |>\n", + "# set_mode(...) \n", + "\n", + "#... <- recipe(... ~ ..., data = ...) |>\n", + "# step_scale(...) |>\n", + "# step_center(...)\n", + "# \n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "marathon_recipe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "a48db7525cb8d5d744a96a5e697b7e9c", + "grade": true, + "grade_id": "cell-289bc7a299c4f482", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"marathon_spec should be a model specification\"= setequal(digest(paste(toString('model_spec' %in% class(marathon_spec)), \"5547\")), \"8ab83844edd34a74021cc1e79fa8cd3e\"))\n", + "stopifnot(\"model specification in marathon_spec is not correct\"= setequal(digest(paste(toString(marathon_spec$mode), \"5547\")), \"b1f1ba983a6682171a6e25213f46a7ec\"))\n", + "stopifnot(\"computational engine in marathon_spec is not correct\"= setequal(digest(paste(toString(marathon_spec$engine), \"5547\")), \"ca33af34f570b911430bce17b5d274b4\"))\n", + "stopifnot(\"weight function in marathon_spec is not correct\"= setequal(digest(paste(toString(quo_name(marathon_spec$args$weight_func)), \"5547\")), \"6a7d1a8c96c075f8ca2b992a6e40a384\"))\n", + "stopifnot(\"number of neighbours in marathon_spec is not correct\"= setequal(digest(paste(toString(quo_name(marathon_spec$args$neighbors)), \"5547\")), \"7367907047c067710c11a10603159af5\"))\n", + "\n", + "stopifnot(\"marathon_recipe should be a recipe\"= setequal(digest(paste(toString('recipe' %in% class(marathon_recipe)), \"5548\")), \"dafc3ea33a8e76c60aa970ef5a81ceca\"))\n", + "stopifnot(\"response variable of marathon_recipe is not correct\"= setequal(digest(paste(toString(sort(filter(marathon_recipe$var_info, role == 'outcome')$variable)), \"5548\")), \"6161ba47763d24154550d947a5e1ad8d\"))\n", + "stopifnot(\"predictor variable(s) of marathon_recipe are not correct\"= setequal(digest(paste(toString(sort(filter(marathon_recipe$var_info, role == 'predictor')$variable)), \"5548\")), \"f9faad8a2551549f2fad9412027cc626\"))\n", + "stopifnot(\"marathon_recipe does not contain the correct data, might need to be standardized\"= setequal(digest(paste(toString(round(sum(bake(prep(marathon_recipe), marathon_recipe$template) %>% select_if(is.numeric), na.rm = TRUE), 2)), \"5548\")), \"90fd9c3ee65ef9b8152e366cdf2e0feb\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "b591bde125171c97afe36a1a6c5a527b", + "grade": false, + "grade_id": "cell-9bbc49f6977aa3fc", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 7.1**\n", + "
{points: 1}\n", + "\n", + "Now, create the splits for cross-validation with *5 folds* using the `vfold_cv` function. Store your answer in an object called `marathon_vfold`. Make sure to set the `strata` argument.\n", + "\n", + "Then, use the `workflow` function to combine your model specification and recipe. Store your answer in an object called `marathon_workflow`. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "c551fd220ee0f022dd4d1aa49e0a74b9", + "grade": false, + "grade_id": "cell-7cf222d5ae9a2f89", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "set.seed(1234) # DO NOT REMOVE\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "marathon_workflow" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "71e14b2b9fcdc15f2d8cb79ad3b1d5a0", + "grade": true, + "grade_id": "cell-88d9f578265d6294", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"marathon_vfold should be a cross validation object\"= setequal(digest(paste(toString('vfold_cv' %in% class(marathon_vfold)), \"ab229\")), \"c3bddbd841824b3cf5b9b336372855ed\"))\n", + "stopifnot(\"number of folds is not correct\"= setequal(digest(paste(toString(length(marathon_vfold$id)), \"ab229\")), \"55c11bef663fb02f72bfcf7930692743\"))\n", + "stopifnot(\"data used is not correct\"= setequal(digest(paste(toString(dim(marathon_vfold)), \"ab229\")), \"c7bcbe7a4a4fbb68b454853bb1e50224\"))\n", + "stopifnot(\"training data is not used\"= setequal(digest(paste(toString(if (any(sapply(marathon_vfold$splits[[1]]$data, is.numeric))) {round(sapply(marathon_vfold$splits[[1]]$data[, sapply(marathon_vfold$splits[[1]]$data, is.numeric)], sum, na.rm = TRUE), 2)}), \"ab229\")), \"6b9c2e796cadf9494d1d87b4e1c00c59\"))\n", + "stopifnot(\"strata argument is not correct\"= setequal(digest(paste(toString(sapply(seq_along(marathon_vfold$splits), function(i) {round(sum(marathon_vfold$splits[[i]]$in_id), 2)})), \"ab229\")), \"b4f23b7d7a8ecce1e7d5a731d02797ce\"))\n", + "\n", + "stopifnot(\"marathon_workflow should be a workflow\"= setequal(digest(paste(toString('workflow' %in% class(marathon_workflow)), \"ab22a\")), \"8d7b4e8c4a7419dbffe358719d5d9bb9\"))\n", + "stopifnot(\"computational engine used in marathon_workflow is not correct\"= setequal(digest(paste(toString(marathon_workflow$fit$actions$model$spec$engine), \"ab22a\")), \"de5ef4970762f2d63df8ccc9ad6aaf1b\"))\n", + "stopifnot(\"model specification used in marathon_workflow is not correct\"= setequal(digest(paste(toString(marathon_workflow$fit$actions$model$spec$mode), \"ab22a\")), \"dcb0e951e2b9778ffc9d472a93e87e47\"))\n", + "stopifnot(\"marathon_workflow must be a trained workflow, make sure to call the fit() function\"= setequal(digest(paste(toString(marathon_workflow$trained), \"ab22a\")), \"95e32306c93c330c5d5720f91540b6bd\"))\n", + "stopifnot(\"predictor variable(s) of marathon_workflow are not correct\"= setequal(digest(paste(toString(sort(filter(marathon_workflow$pre$actions$recipe$recipe$var_info, role == 'predictor')$variable)), \"ab22a\")), \"8bd9531610d94899e636e21a69c93095\"))\n", + "stopifnot(\"marathon_workflow does not contain the correct data\"= setequal(digest(paste(toString(sort(vapply(marathon_workflow$pre$mold$predictors[, sapply(marathon_workflow$pre$mold$predictors, is.numeric)], function(col) if(!is.null(col)) round(sum(col), 2) else NA_real_, numeric(1)), na.last = NA)), \"ab22a\")), \"1262d944405c553c4bb54ec1a09cb57f\"))\n", + "stopifnot(\"did not fit marathon_workflow on the training dataset\"= setequal(digest(paste(toString(nrow(marathon_workflow$pre$mold$outcomes)), \"ab22a\")), \"1262d944405c553c4bb54ec1a09cb57f\"))\n", + "stopifnot(\"for classification/regression models, weight function is not correct\"= setequal(digest(paste(toString(quo_name(marathon_workflow$fit$actions$model$spec$args$weight_func)), \"ab22a\")), \"a84b6b776aa8868e397d0f7942c84d8e\"))\n", + "stopifnot(\"for classification/regression models, response variable of marathon_workflow is not correct\"= setequal(digest(paste(toString(sort(filter(marathon_workflow$pre$actions$recipe$recipe$var_info, role == 'outcome')$variable)), \"ab22a\")), \"802f4cdc88968d5e300ba52cf0b5310f\"))\n", + "stopifnot(\"for KNN models, number of neighbours is not correct\"= setequal(digest(paste(toString(quo_name(marathon_workflow$fit$actions$model$spec$args$neighbors)), \"ab22a\")), \"011cdad6fa1b1ee0c92e3621303b70ed\"))\n", + "stopifnot(\"for clustering models, the clustering is not correct\"= setequal(digest(paste(toString(marathon_workflow$fit$fit$fit$cluster), \"ab22a\")), \"1262d944405c553c4bb54ec1a09cb57f\"))\n", + "stopifnot(\"for clustering models, the total within-cluster sum-of-squared distances is not correct\"= setequal(digest(paste(toString(if (!is.null(marathon_workflow$fit$fit$fit$tot.withinss)) round(marathon_workflow$fit$fit$fit$tot.withinss, 2) else NULL), \"ab22a\")), \"1262d944405c553c4bb54ec1a09cb57f\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "06d081b570a555d6fdd7bddae615221f", + "grade": false, + "grade_id": "cell-7cd5193eecc27c30", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "**Question 8.0**\n", + "
{points: 1}\n", + "\n", + "If you haven't noticed by now, the major difference compared to other workflows from Chapters 6 and 7 is that we are running a *regression* rather than a *classification*. Specifying *regression* in the `set_mode` function essentially tells `tidymodels` that we need to use different metrics (RMPSE rather than accuracy) for tuning and evaluation. \n", + "\n", + "Now, let's use the RMSPE to find the best setting for $k$ from our workflow. Let's test the values $k = 1, 11, 21, 31, ..., 81$.\n", + "\n", + "First, create a tibble with a column called `neighbors` that contains the sequence of values. Remember that you should use the `seq` function to create the range of $k$s that goes *from* 1 *to* 81 *by jumps of* 10. Assign that tibble to an object called `gridvals`. \n", + "\n", + "Next, tune your workflow such that it tests all the values in `gridvals` and *resamples* using your cross-validation data set. Finally, collect the statistics from your model. Assign your answer to an object called `marathon_results`.\n", + "\n", + "*Note: For this question, do not apply any filters to the metrics. Print all metrics exactly as returned by the function.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "79b57741a3f0d43055e78fc0f333749c", + "grade": false, + "grade_id": "cell-8c7e7ded673d28ec", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "set.seed(2019) # DO NOT CHANGE\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "marathon_results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "257bf5db6b9b79ff1520b860c8e3f888", + "grade": true, + "grade_id": "cell-29d608ebfdd11366", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"gridvals should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(gridvals)), \"9e09\")), \"e3785f29ad31e6528294ba45bd185310\"))\n", + "stopifnot(\"dimensions of gridvals are not correct\"= setequal(digest(paste(toString(dim(gridvals)), \"9e09\")), \"74028ed922d45a6a780d2f5306e7d24e\"))\n", + "stopifnot(\"column names of gridvals are not correct\"= setequal(digest(paste(toString(sort(colnames(gridvals))), \"9e09\")), \"14b2d11c9f26fcd55788479cedad5474\"))\n", + "stopifnot(\"types of columns in gridvals are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(gridvals, class)))), \"9e09\")), \"e36ee1654fef221fccb6f406dedeef5e\"))\n", + "stopifnot(\"values in one or more numerical columns in gridvals are not correct\"= setequal(digest(paste(toString(if (any(sapply(gridvals, is.numeric))) sort(round(sapply(gridvals[, sapply(gridvals, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"9e09\")), \"143d0c8eb4893dcebe4b08f79c11636c\"))\n", + "stopifnot(\"values in one or more character columns in gridvals are not correct\"= setequal(digest(paste(toString(if (any(sapply(gridvals, is.character))) sum(sapply(gridvals[sapply(gridvals, is.character)], function(x) length(unique(x)))) else 0), \"9e09\")), \"78cc6b154549e7981035d4e9982eee20\"))\n", + "stopifnot(\"values in one or more factor columns in gridvals are not correct\"= setequal(digest(paste(toString(if (any(sapply(gridvals, is.factor))) sum(sapply(gridvals[, sapply(gridvals, is.factor)], function(col) length(unique(col)))) else 0), \"9e09\")), \"78cc6b154549e7981035d4e9982eee20\"))\n", + "\n", + "stopifnot(\"marathon_results should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon_results)), \"9e0a\")), \"df7374252434fb128d5f92ddd0f81b58\"))\n", + "stopifnot(\"dimensions of marathon_results are not correct\"= setequal(digest(paste(toString(dim(marathon_results)), \"9e0a\")), \"4b785364ac3b2b46c601276c8462a845\"))\n", + "stopifnot(\"column names of marathon_results are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon_results))), \"9e0a\")), \"7855f48ba3ceba568928de2dcac36b0d\"))\n", + "stopifnot(\"types of columns in marathon_results are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon_results, class)))), \"9e0a\")), \"5d450389a632649592270bf8862bd544\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon_results are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_results, is.numeric))) sort(round(sapply(marathon_results[, sapply(marathon_results, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"9e0a\")), \"93540c0b281129461265ef8b0b6c48be\"))\n", + "stopifnot(\"values in one or more character columns in marathon_results are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_results, is.character))) sum(sapply(marathon_results[sapply(marathon_results, is.character)], function(x) length(unique(x)))) else 0), \"9e0a\")), \"ea80b90301e9ed819d58d7909bfeead0\"))\n", + "stopifnot(\"values in one or more factor columns in marathon_results are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_results, is.factor))) sum(sapply(marathon_results[, sapply(marathon_results, is.factor)], function(col) length(unique(col)))) else 0), \"9e0a\")), \"f6143329c548deabbac24c3034b08ebf\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "ffc7314876aa53957c34b549d51a8367", + "grade": false, + "grade_id": "cell-35eaea9c384c5f6c", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "**Question 8.1**\n", + "
{points: 1}\n", + "\n", + "Great! Now find the *minimum* RMSPE along with it's associated metrics such as the mean and standard error, to help us find the number of neighbors that will serve as our best $k$ value. Your answer should simply be a tibble with one row. Assign your answer to an object called `marathon_min`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "99a6de5bf9d8d8c29eb0df4704b4e848", + "grade": false, + "grade_id": "cell-f9738d45da9e8a77", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "set.seed(2020) # DO NOT REMOVE\n", + "\n", + "#... <- marathon_results |>\n", + "# filter(.metric == ...) |>\n", + "# slice_min(..., ...)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "marathon_min" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "236379001bc83be21ff82da0403ca93a", + "grade": true, + "grade_id": "cell-a1e34e1fb62d79f5", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"marathon_min should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon_min)), \"33a4\")), \"40b5e33777ec7eb065f043fa71f535a6\"))\n", + "stopifnot(\"dimensions of marathon_min are not correct\"= setequal(digest(paste(toString(dim(marathon_min)), \"33a4\")), \"38b61400a91856750685827d24948776\"))\n", + "stopifnot(\"column names of marathon_min are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon_min))), \"33a4\")), \"e6b0259c3aaa121f0abdc46cafe6e2c2\"))\n", + "stopifnot(\"types of columns in marathon_min are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon_min, class)))), \"33a4\")), \"991d6e89327c147fc4962bd227ea29ba\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon_min are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_min, is.numeric))) sort(round(sapply(marathon_min[, sapply(marathon_min, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"33a4\")), \"0b8ad5b8e14a65e2bdb202f878d3aa33\"))\n", + "stopifnot(\"values in one or more character columns in marathon_min are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_min, is.character))) sum(sapply(marathon_min[sapply(marathon_min, is.character)], function(x) length(unique(x)))) else 0), \"33a4\")), \"066a3d400801f50d6cfbbb54e8a91f62\"))\n", + "stopifnot(\"values in one or more factor columns in marathon_min are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_min, is.factor))) sum(sapply(marathon_min[, sapply(marathon_min, is.factor)], function(col) length(unique(col)))) else 0), \"33a4\")), \"d9bdf2992b7d868327011d545b756c8c\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "059df927a68d576994b89a53009dd594", + "grade": false, + "grade_id": "cell-28a414eb59f5ae20", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "**Question 8.2**\n", + "
{points: 1}\n", + "\n", + "To assess how well our model might do at predicting on unseen data, we will assess its RMSPE on the test data. To do this, we will first re-train our $k$-nn regression model on the entire training data set, using the $K$ value we obtained from **Question 8.1**. \n", + "\n", + "To start, pull the best `neighbors` value from `marathon_min` and store it an object called `k_min`. \n", + "\n", + "Following that, we will repeat the workflow analysis again but with a brand new model specification with `k_min`. Remember, we are doing a regression analysis, so please select the appropriate mode. Store your new model specification in an object called `marathon_best_spec` and your new workflow analysis in an object called `marathon_best_fit`. You can reuse this `marathon_recipe` for this workflow, as we do not need to change it for this task.\n", + "\n", + "Then, we will use the `predict` function to make predictions on the test data, and use the `metrics` function again to compute a summary of the regression's quality. Store your answer in an object called `marathon_summary`. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "8d9c278960e85de2b5b718d977d715cd", + "grade": false, + "grade_id": "cell-a4de6046a0bd5f96", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "set.seed(1234) # DO NOT REMOVE\n", + "\n", + "#... <- marathon_min |>\n", + "# pull(...)\n", + "\n", + "#... <- nearest_neighbor(weight_func = ..., neighbors = ...) |>\n", + "# set_engine(...) |>\n", + "# set_mode(...)\n", + "\n", + "#... <- workflow() |>\n", + "# add_recipe(...) |>\n", + "# add_model(...) |>\n", + "# fit(data = ...)\n", + "\n", + "#... <- marathon_best_fit |>\n", + "# predict(...) |>\n", + "# bind_cols(...) |>\n", + "# metrics(truth = ..., estimate = ...)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "marathon_summary" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "9059e615b6cdd649371f38389a634ab3", + "grade": true, + "grade_id": "cell-94fab75dca459b65", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of k_min is not numeric\"= setequal(digest(paste(toString(class(k_min)), \"550f6\")), \"01c662532c057a22b2b9cb927ea3aa3f\"))\n", + "stopifnot(\"value of k_min is not correct (rounded to 2 decimal places)\"= setequal(digest(paste(toString(round(k_min, 2)), \"550f6\")), \"5b34b812d262ed0bf637355b4e5408ce\"))\n", + "stopifnot(\"length of k_min is not correct\"= setequal(digest(paste(toString(length(k_min)), \"550f6\")), \"f23949a15452933ed5d3171f2d85ddf5\"))\n", + "stopifnot(\"values of k_min are not correct\"= setequal(digest(paste(toString(sort(round(k_min, 2))), \"550f6\")), \"5b34b812d262ed0bf637355b4e5408ce\"))\n", + "\n", + "stopifnot(\"marathon_best_spec should be a model specification\"= setequal(digest(paste(toString('model_spec' %in% class(marathon_best_spec)), \"550f7\")), \"71cbb182aea5ad1ec22f3e1d3149f5a7\"))\n", + "stopifnot(\"model specification in marathon_best_spec is not correct\"= setequal(digest(paste(toString(marathon_best_spec$mode), \"550f7\")), \"ba973cd2e3bbdcb580ce21682cbbed34\"))\n", + "stopifnot(\"computational engine in marathon_best_spec is not correct\"= setequal(digest(paste(toString(marathon_best_spec$engine), \"550f7\")), \"03bd61c48336305c8fe6492179a01fa5\"))\n", + "stopifnot(\"weight function in marathon_best_spec is not correct\"= setequal(digest(paste(toString(quo_name(marathon_best_spec$args$weight_func)), \"550f7\")), \"14ab6f06ac87b4cdfc2197cf474d4fe5\"))\n", + "stopifnot(\"number of neighbours in marathon_best_spec is not correct\"= setequal(digest(paste(toString(quo_name(marathon_best_spec$args$neighbors)), \"550f7\")), \"fec54ae3124ff36fc32e618112b69461\"))\n", + "\n", + "stopifnot(\"marathon_best_fit should be a workflow\"= setequal(digest(paste(toString('workflow' %in% class(marathon_best_fit)), \"550f8\")), \"c278370a8060d698d5d49b79c4d8b9f7\"))\n", + "stopifnot(\"computational engine used in marathon_best_fit is not correct\"= setequal(digest(paste(toString(marathon_best_fit$fit$actions$model$spec$engine), \"550f8\")), \"9a4ca5e5b6ba68241af1ae159b63e8c6\"))\n", + "stopifnot(\"model specification used in marathon_best_fit is not correct\"= setequal(digest(paste(toString(marathon_best_fit$fit$actions$model$spec$mode), \"550f8\")), \"9013407379761a62f44a4853cbf09d68\"))\n", + "stopifnot(\"marathon_best_fit must be a trained workflow, make sure to call the fit() function\"= setequal(digest(paste(toString(marathon_best_fit$trained), \"550f8\")), \"c278370a8060d698d5d49b79c4d8b9f7\"))\n", + "stopifnot(\"predictor variable(s) of marathon_best_fit are not correct\"= setequal(digest(paste(toString(sort(filter(marathon_best_fit$pre$actions$recipe$recipe$var_info, role == 'predictor')$variable)), \"550f8\")), \"0bb1a7322ed0ac79cbb99ece1bed5fe8\"))\n", + "stopifnot(\"marathon_best_fit does not contain the correct data\"= setequal(digest(paste(toString(sort(vapply(marathon_best_fit$pre$mold$predictors[, sapply(marathon_best_fit$pre$mold$predictors, is.numeric)], function(col) if(!is.null(col)) round(sum(col), 2) else NA_real_, numeric(1)), na.last = NA)), \"550f8\")), \"5996592f0d4539ed7cdde9924dd1c180\"))\n", + "stopifnot(\"did not fit marathon_best_fit on the training dataset\"= setequal(digest(paste(toString(nrow(marathon_best_fit$pre$mold$outcomes)), \"550f8\")), \"f77ac93555ebf5acd29489e116cfe1f3\"))\n", + "stopifnot(\"for classification/regression models, weight function is not correct\"= setequal(digest(paste(toString(quo_name(marathon_best_fit$fit$actions$model$spec$args$weight_func)), \"550f8\")), \"38d7480d57d211b785f6957842d1cc37\"))\n", + "stopifnot(\"for classification/regression models, response variable of marathon_best_fit is not correct\"= setequal(digest(paste(toString(sort(filter(marathon_best_fit$pre$actions$recipe$recipe$var_info, role == 'outcome')$variable)), \"550f8\")), \"1b3320cc17789ad0a707b28c1833b4ba\"))\n", + "stopifnot(\"for KNN models, number of neighbours is not correct\"= setequal(digest(paste(toString(quo_name(marathon_best_fit$fit$actions$model$spec$args$neighbors)), \"550f8\")), \"6c7d30dbf17d893256cca52b8803fdbc\"))\n", + "stopifnot(\"for clustering models, the clustering is not correct\"= setequal(digest(paste(toString(marathon_best_fit$fit$fit$fit$cluster), \"550f8\")), \"a3b5926535d0e0cb6c4b76c762df3a21\"))\n", + "stopifnot(\"for clustering models, the total within-cluster sum-of-squared distances is not correct\"= setequal(digest(paste(toString(if (!is.null(marathon_best_fit$fit$fit$fit$tot.withinss)) round(marathon_best_fit$fit$fit$fit$tot.withinss, 2) else NULL), \"550f8\")), \"a3b5926535d0e0cb6c4b76c762df3a21\"))\n", + "\n", + "stopifnot(\"marathon_summary should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon_summary)), \"550f9\")), \"8510d8378e729a04fefcf81210f014ad\"))\n", + "stopifnot(\"dimensions of marathon_summary are not correct\"= setequal(digest(paste(toString(dim(marathon_summary)), \"550f9\")), \"08fa1b5bd8cded25763a7850d373ec09\"))\n", + "stopifnot(\"column names of marathon_summary are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon_summary))), \"550f9\")), \"8d5530627575e8e898130cc2baea6b47\"))\n", + "stopifnot(\"types of columns in marathon_summary are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon_summary, class)))), \"550f9\")), \"2b01203f0e2844f3ba994d132f86283f\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon_summary are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_summary, is.numeric))) sort(round(sapply(marathon_summary[, sapply(marathon_summary, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"550f9\")), \"490760f22419c4660094e8f16d1c9718\"))\n", + "stopifnot(\"values in one or more character columns in marathon_summary are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_summary, is.character))) sum(sapply(marathon_summary[sapply(marathon_summary, is.character)], function(x) length(unique(x)))) else 0), \"550f9\")), \"0b83a45fc9152b6322604a7124e0f87d\"))\n", + "stopifnot(\"values in one or more factor columns in marathon_summary are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_summary, is.factor))) sum(sapply(marathon_summary[, sapply(marathon_summary, is.factor)], function(col) length(unique(col)))) else 0), \"550f9\")), \"c67c2b0da350f0e27a9914f9e81220ab\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "1feec2b034696ca26148ef224854254e", + "grade": false, + "grade_id": "cell-dffef3173aff9b72", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "What does this RMSPE score mean? RMSPE is measured in the units of the target/response variable, so it can sometimes be a bit hard to interpret. But in this case, we know that a typical marathon race time is somewhere between 3 - 5 hours. So this model allows us to predict a runner's race time up to about +/-0.6 of an hour, or +/- 36 minutes. This is not *fantastic*, but not *terrible* either. We can certainly use the model to determine roughly whether an athlete will have a bad, good, or excellent race time, but probably cannot reliably distinguish between athletes of a similar caliber.\n", + "\n", + "For now, let’s consider this approach to thinking about RMSPE from our testing data set: as long as its not significantly worse than the cross-validation RMSPE of our best model (**Question 8.1**), then we can say that we’re not doing too much worse on the test data than we did on the training data. In future courses on statistical/machine learning, you will learn more about how to interpret RMSPE from testing data and other ways to assess models. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "553011861ab39af59d82c7ff28c78d80", + "grade": false, + "grade_id": "cell-ed97bb769cc923e5", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 8.3** True or False:\n", + "
{points: 1}\n", + "\n", + "The RMPSE from our testing data set is *much worse* than the cross-validation RMPSE of our best model. \n", + "\n", + "*Assign your answer to an object named `answer8.3`. Make sure your answer is in lowercase and is surrounded by quotation marks (e.g. `\"true\"` or `\"false\"`).*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "57ecd2f6ac40fa40cd040ffc75fc7ce5", + "grade": false, + "grade_id": "cell-24ccc13552b0fff1", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "099fe7f9c58ee09e8ce13f4795801881", + "grade": true, + "grade_id": "cell-f39ed1223c189ec6", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer8.3 is not character\"= setequal(digest(paste(toString(class(answer8.3)), \"aa898\")), \"fdfbb8dbf9e39738698be2e00f17a87d\"))\n", + "stopifnot(\"length of answer8.3 is not correct\"= setequal(digest(paste(toString(length(answer8.3)), \"aa898\")), \"a40dcc06a77707553cc5d3e1acdc02dd\"))\n", + "stopifnot(\"value of answer8.3 is not correct\"= setequal(digest(paste(toString(tolower(answer8.3)), \"aa898\")), \"3463dffe41b3e1babe842b2dfef22ebe\"))\n", + "stopifnot(\"letters in string value of answer8.3 are correct but case is not correct\"= setequal(digest(paste(toString(answer8.3), \"aa898\")), \"3463dffe41b3e1babe842b2dfef22ebe\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "35a07e04ba2aae150c37aad410e9c317", + "grade": false, + "grade_id": "cell-f527fba0cc1ca89d", + "locked": true, + "schema_version": 3, + "solution": false + } + }, + "source": [ + "**Question 9.0**\n", + "
{points: 1}\n", + "\n", + "Let's visualize what the relationship between `max` and `time_hrs` looks like with our best $k$ value to ultimately explore how the $k$ value affects $k$-nn regression.\n", + "\n", + "To do so, use the `predict` function on the workflow analysis that utilizes the best $k$ value (`marathon_best_fit`) to create predictions for the `marathon_training` data. Then, add the column of predictions to the `marathon_training` data frame using `bind_cols`. Name the resulting data frame `marathon_preds`.\n", + "\n", + "Next, create a scatterplot with the maximum distance ran per week against the marathon time from `marathon_preds`. Assign your plot to an object called `marathon_plot`. **Plot the predictions as a blue line over the data points.** Remember the fundamentals of effective visualizations such as having a **title** and **human-readable axes**. \n", + "\n", + "*Note: use `geom_point` before `geom_line` when creating the plot to ensure tests pass!*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "dd315f90974d6f742929f677f91a1807", + "grade": false, + "grade_id": "cell-e623a65f902a7e98", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "set.seed(2019) # DO NOT CHANGE\n", + "\n", + "options(repr.plot.width = 7, repr.plot.height = 7)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "marathon_plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "742eba48e65439750fb6ae57132c6288", + "grade": true, + "grade_id": "cell-5eff9c974a058bdf", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of plot is not correct (if you are using two types of geoms, try flipping the order of the geom objects!)\"= setequal(digest(paste(toString(sapply(seq_len(length(marathon_plot$layers)), function(i) {c(class(marathon_plot$layers[[i]]$geom))[1]})), \"b9743\")), \"d56026378896396ba628c5624640af83\"))\n", + "stopifnot(\"variable x is not correct\"= setequal(digest(paste(toString(unlist(lapply(sapply(seq_len(length(marathon_plot$layers)), function(i) {rlang::get_expr(c(marathon_plot$layers[[i]]$mapping, marathon_plot$mapping)$x)}), as.character))), \"b9743\")), \"a7b5ce253cd34ddb441f3328ae587132\"))\n", + "stopifnot(\"variable y is not correct\"= setequal(digest(paste(toString(unlist(lapply(sapply(seq_len(length(marathon_plot$layers)), function(i) {rlang::get_expr(c(marathon_plot$layers[[i]]$mapping, marathon_plot$mapping)$y)}), as.character))), \"b9743\")), \"df7f01f9cbc56804207c66b99869848c\"))\n", + "stopifnot(\"x-axis label is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_plot$layers[[1]]$mapping, marathon_plot$mapping)$x)!= marathon_plot$labels$x), \"b9743\")), \"626430ae9d9877f3617df01c7f947285\"))\n", + "stopifnot(\"y-axis label is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_plot$layers[[1]]$mapping, marathon_plot$mapping)$y)!= marathon_plot$labels$y), \"b9743\")), \"626430ae9d9877f3617df01c7f947285\"))\n", + "stopifnot(\"incorrect colour variable in marathon_plot, specify a correct one if required\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_plot$layers[[1]]$mapping, marathon_plot$mapping)$colour)), \"b9743\")), \"6e54f7c10e27f2dd167e178dadf2b08d\"))\n", + "stopifnot(\"incorrect shape variable in marathon_plot, specify a correct one if required\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_plot$layers[[1]]$mapping, marathon_plot$mapping)$shape)), \"b9743\")), \"6e54f7c10e27f2dd167e178dadf2b08d\"))\n", + "stopifnot(\"the colour label in marathon_plot is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_plot$layers[[1]]$mapping, marathon_plot$mapping)$colour) != marathon_plot$labels$colour), \"b9743\")), \"6e54f7c10e27f2dd167e178dadf2b08d\"))\n", + "stopifnot(\"the shape label in marathon_plot is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_plot$layers[[1]]$mapping, marathon_plot$mapping)$colour) != marathon_plot$labels$shape), \"b9743\")), \"6e54f7c10e27f2dd167e178dadf2b08d\"))\n", + "stopifnot(\"fill variable in marathon_plot is not correct\"= setequal(digest(paste(toString(quo_name(marathon_plot$mapping$fill)), \"b9743\")), \"030c9e71d2ec4fea75b9143c74f57384\"))\n", + "stopifnot(\"fill label in marathon_plot is not informative\"= setequal(digest(paste(toString((quo_name(marathon_plot$mapping$fill) != marathon_plot$labels$fill)), \"b9743\")), \"6e54f7c10e27f2dd167e178dadf2b08d\"))\n", + "stopifnot(\"position argument in marathon_plot is not correct\"= setequal(digest(paste(toString(class(marathon_plot$layers[[1]]$position)[1]), \"b9743\")), \"5080a849493eb59c49af507ee557edd2\"))\n", + "\n", + "stopifnot(\"marathon_plot$data should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon_plot$data)), \"b9744\")), \"ff94263ebc353ae0e93a3610dae0c0c7\"))\n", + "stopifnot(\"dimensions of marathon_plot$data are not correct\"= setequal(digest(paste(toString(dim(marathon_plot$data)), \"b9744\")), \"bdb3587ec0317d8baf1dac70b56564a9\"))\n", + "stopifnot(\"column names of marathon_plot$data are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon_plot$data))), \"b9744\")), \"f908eda84c0f11bb1c252c99eb5de321\"))\n", + "stopifnot(\"types of columns in marathon_plot$data are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon_plot$data, class)))), \"b9744\")), \"5f21a4c36da13ec62b20ea7ebd9c6380\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon_plot$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_plot$data, is.numeric))) sort(round(sapply(marathon_plot$data[, sapply(marathon_plot$data, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"b9744\")), \"1e47fdb80dba2897519f48a743bcfcd7\"))\n", + "stopifnot(\"values in one or more character columns in marathon_plot$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_plot$data, is.character))) sum(sapply(marathon_plot$data[sapply(marathon_plot$data, is.character)], function(x) length(unique(x)))) else 0), \"b9744\")), \"d6d21b5fdf47ab635b546662cb873731\"))\n", + "stopifnot(\"values in one or more factor columns in marathon_plot$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_plot$data, is.factor))) sum(sapply(marathon_plot$data[, sapply(marathon_plot$data, is.factor)], function(col) length(unique(col)))) else 0), \"b9744\")), \"d6d21b5fdf47ab635b546662cb873731\"))\n", + "\n", + "stopifnot(\"type of is.character(marathon_plot$labels$title) is not logical\"= setequal(digest(paste(toString(class(is.character(marathon_plot$labels$title))), \"b9745\")), \"5f2cdb5e0db0082e3ba2c51886d533f9\"))\n", + "stopifnot(\"logical value of is.character(marathon_plot$labels$title) is not correct\"= setequal(digest(paste(toString(is.character(marathon_plot$labels$title)), \"b9745\")), \"a2d537ac5f121c20197a0b3c01f19b6a\"))\n", + "\n", + "stopifnot(\"type of as.character(marathon_plot$layers[[2]]$aes_params) is not character\"= setequal(digest(paste(toString(class(as.character(marathon_plot$layers[[2]]$aes_params))), \"b9746\")), \"2837cd9a472e7f8bf026f283bcdfeb4b\"))\n", + "stopifnot(\"length of as.character(marathon_plot$layers[[2]]$aes_params) is not correct\"= setequal(digest(paste(toString(length(as.character(marathon_plot$layers[[2]]$aes_params))), \"b9746\")), \"4721e09e2f46499eda0e022ab36a0863\"))\n", + "stopifnot(\"value of as.character(marathon_plot$layers[[2]]$aes_params) is not correct\"= setequal(digest(paste(toString(tolower(as.character(marathon_plot$layers[[2]]$aes_params))), \"b9746\")), \"f748724dc18e96f9604fe136f112f1ca\"))\n", + "stopifnot(\"letters in string value of as.character(marathon_plot$layers[[2]]$aes_params) are correct but case is not correct\"= setequal(digest(paste(toString(as.character(marathon_plot$layers[[2]]$aes_params)), \"b9746\")), \"f748724dc18e96f9604fe136f112f1ca\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "source('cleanup.R')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "4.3.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/materials/R/worksheet_regression2/.ipynb_checkpoints/worksheet_regression2-checkpoint.ipynb b/materials/R/worksheet_regression2/.ipynb_checkpoints/worksheet_regression2-checkpoint.ipynb new file mode 100644 index 0000000..3f67068 --- /dev/null +++ b/materials/R/worksheet_regression2/.ipynb_checkpoints/worksheet_regression2-checkpoint.ipynb @@ -0,0 +1,1902 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "44d76b7a3bb4dfb5634051dc99ede3ff", + "grade": false, + "grade_id": "cell-fe148db84368f758", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "# Worksheet: Regression II: linear regression\n", + "\n", + "This worksheet covers the [Regression II: linear regression](https://datasciencebook.ca/regression2.html) chapter of the online textbook, which also lists the learning objectives for this worksheet. You should read the textbook chapter before attempting this worksheet. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "94aed1b06b55ea926d94a5370e98a804", + "grade": false, + "grade_id": "cell-9d267d6cbb575992", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "### Run this cell before continuing.\n", + "library(tidyverse)\n", + "library(repr)\n", + "library(tidymodels)\n", + "library(cowplot)\n", + "options(repr.matrix.max.rows = 6)\n", + "source('cleanup.R')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "9f9b1c5d7e6c54f6b21d4f3a94bd4b02", + "grade": false, + "grade_id": "cell-5cd24ea6314eb5a1", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "### Warm-up Questions\n", + "\n", + "Here are some warm-up questions on the topic of multiple regression to get you thinking before we jump into data analysis. The course readings should help you answer these.\n", + "\n", + "**Question 1.0** Multiple Choice: \n", + "
{points: 1}\n", + "\n", + "In multivariate k-nn regression with one outcome/target variable and two predictor variables, the predictions take the form of what shape?\n", + "\n", + "A. a flat plane\n", + "\n", + "B. a wiggly/flexible plane\n", + "\n", + "C. A straight line\n", + "\n", + "D. a wiggly/flexible line\n", + "\n", + "E. a 4D hyperplane\n", + "\n", + "F. a 4D wiggly/flexible hyperplane\n", + "\n", + "Save the letter of the answer you think is correct to a variable named `answer1.0`. Make sure you put quotations around the letter and pay attention to case." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "b68e9781d7d6de0dc3b6020e0032c8a2", + "grade": false, + "grade_id": "cell-c83bc93df7f00340", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "66ab381450430f7d6bcc24aa69a3ff6f", + "grade": true, + "grade_id": "cell-69d13db813c674a2", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer1.0 is not character\"= setequal(digest(paste(toString(class(answer1.0)), \"c1dca\")), \"0062368865d313b1d9a0758736981e96\"))\n", + "stopifnot(\"length of answer1.0 is not correct\"= setequal(digest(paste(toString(length(answer1.0)), \"c1dca\")), \"0b9103cc5a6d5362898cda5f7da0cd1f\"))\n", + "stopifnot(\"value of answer1.0 is not correct\"= setequal(digest(paste(toString(tolower(answer1.0)), \"c1dca\")), \"cfeaebac99f01978e387bb024cdc6d11\"))\n", + "stopifnot(\"letters in string value of answer1.0 are correct but case is not correct\"= setequal(digest(paste(toString(answer1.0), \"c1dca\")), \"6bfc86aa847ff4157dd3ba4de5012220\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "ce8c7ccf1a34341294b2fa56d53a7361", + "grade": false, + "grade_id": "cell-17b50854bfe8bed1", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 1.1** Multiple Choice: \n", + "
{points: 1}\n", + "\n", + "In simple linear regression with one outcome/target variable and one predictor variable, the predictions take the form of what shape?\n", + "\n", + "A. a flat plane\n", + "\n", + "B. a wiggly/flexible plane\n", + "\n", + "C. A straight line\n", + "\n", + "D. a wiggly/flexible line\n", + "\n", + "E. a 4D hyperplane\n", + "\n", + "F. a 4D wiggly/flexible hyperplane\n", + "\n", + "Save the letter of the answer you think is correct to a variable named `answer1.1`. Make sure you put quotations around the letter and pay attention to case." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "d34dc1682e6322aefc58306e7d4c1c93", + "grade": false, + "grade_id": "cell-4ba2e045fef50db4", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "ea9060b1e9142004aab4d87847eaee56", + "grade": true, + "grade_id": "cell-f7542e36e61cd131", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer1.1 is not character\"= setequal(digest(paste(toString(class(answer1.1)), \"9f35e\")), \"0f1e7877308258f0f4d1265b701d5e1e\"))\n", + "stopifnot(\"length of answer1.1 is not correct\"= setequal(digest(paste(toString(length(answer1.1)), \"9f35e\")), \"96556f84d0cfd3c5304b670e691a10af\"))\n", + "stopifnot(\"value of answer1.1 is not correct\"= setequal(digest(paste(toString(tolower(answer1.1)), \"9f35e\")), \"0afb28f4f1c05ad02549badb853886e1\"))\n", + "stopifnot(\"letters in string value of answer1.1 are correct but case is not correct\"= setequal(digest(paste(toString(answer1.1), \"9f35e\")), \"5f72ad18f4b91281489a09abd242d8f6\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "e23b8e9e1e97119c478f42967f22353b", + "grade": false, + "grade_id": "cell-4c7cb5e7fd4eb668", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 1.2** Multiple Choice: \n", + "
{points: 1}\n", + "\n", + "In multiple linear regression with one outcome/target variable and two predictor variables, the predictions take the form of what shape?\n", + "\n", + "A. a flat plane\n", + "\n", + "B. a wiggly/flexible plane\n", + "\n", + "C. A straight line\n", + "\n", + "D. a wiggly/flexible line\n", + "\n", + "E. a 4D hyperplane\n", + "\n", + "F. a 4D wiggly/flexible hyperplane\n", + "\n", + "Save the letter of the answer you think is correct to a variable named `answer1.2`. Make sure you put quotations around the letter and pay attention to case." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "1c3f3c09e5b729cf0c2c0661a5535c95", + "grade": false, + "grade_id": "cell-079e531ebcb88c60", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "99850cb54bb10e3d9ef41eb1cfdcd231", + "grade": true, + "grade_id": "cell-547b82e7a64b9aa1", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer1.2 is not character\"= setequal(digest(paste(toString(class(answer1.2)), \"962df\")), \"edcf1b7f6d8db50d0ef71fe87b9ff265\"))\n", + "stopifnot(\"length of answer1.2 is not correct\"= setequal(digest(paste(toString(length(answer1.2)), \"962df\")), \"7f043cb37eae566922f1d03b91a3eeff\"))\n", + "stopifnot(\"value of answer1.2 is not correct\"= setequal(digest(paste(toString(tolower(answer1.2)), \"962df\")), \"63b747cced3f34f794ea789c80a52f56\"))\n", + "stopifnot(\"letters in string value of answer1.2 are correct but case is not correct\"= setequal(digest(paste(toString(answer1.2), \"962df\")), \"8ead807c82b5f9abb9a8d8440d856001\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "90902cc596c86e241d127a643265bc37", + "grade": false, + "grade_id": "cell-6ae21507eed64700", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "### Understanding Simple Linear Regression\n", + "\n", + "Consider this small and simple dataset: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "f0f86068987ba71eba448bbf880edf7f", + "grade": false, + "grade_id": "cell-2850a0b99f14004c", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "## run this code \n", + "simple_data <- tibble(X = c(1, 2, 3, 6, 7, 7),\n", + " Y = c(1, 1, 3, 5, 7, 6))\n", + "options(repr.plot.width = 5, repr.plot.height = 5)\n", + "base <- ggplot(simple_data, aes(x = X, y = Y)) +\n", + " geom_point(size = 2) +\n", + " scale_x_continuous(limits = c(0, 7.5), breaks = seq(0, 8), minor_breaks = seq(0, 8, 0.25)) +\n", + " scale_y_continuous(limits = c(0, 7.5), breaks = seq(0, 8), minor_breaks = seq(0, 8, 0.25)) +\n", + " theme(text = element_text(size = 20))\n", + "base " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "d61fea0714dff7819361e1783a0ae096", + "grade": false, + "grade_id": "cell-158e53f25ab76890", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Now consider these three **potential** lines we could fit for the same dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "afece69f7ae5bd9698ddd517494f354a", + "grade": false, + "grade_id": "cell-72d1bbcacfd85b37", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "options(repr.plot.height = 3.5, repr.plot.width = 10)\n", + "line_a <- base +\n", + " ggtitle(\"Line A\") +\n", + " geom_abline(intercept = -0.897, slope = 0.9834, color = \"blue\") +\n", + " theme(text = element_text(size = 20))\n", + "line_b <- base +\n", + " ggtitle(\"Line B\") +\n", + " geom_abline(intercept = 0.1022, slope = 0.9804, color = \"purple\") +\n", + " theme(text = element_text(size = 20))\n", + "line_c <- base +\n", + " ggtitle(\"Line C\") +\n", + " geom_abline(intercept = -0.2347, slope = 0.9164, color = \"green\") +\n", + " theme(text = element_text(size = 20))\n", + "plot_grid(line_a, line_b, line_c, ncol = 3)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "76961602fb170179a7db9f5d5a5524d8", + "grade": false, + "grade_id": "cell-d7bb06c12cba1681", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.0**\n", + "
{points: 1}\n", + "\n", + "Use the graph below titled \"Line A\" to roughly calculate the average squared vertical distance between the points and the blue line. Read values of the graph to a **precision of 0.25** (e.g. 1, 1.25, 1.5, 1.75, 2). Save your answer to a variable named `answer2.0`. \n", + "\n", + "*We reprint the plot for you in a larger size to make it easier to estimate the locations on the graph.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "a063f46b4bc24b65cf63dc98fe1d847c", + "grade": false, + "grade_id": "cell-4d84f8c3727420a2", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "#run this code\n", + "options(repr.plot.width = 9, repr.plot.height = 9)\n", + "line_a" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "7b96aa9e4ee450dba46cb8c4faf86ede", + "grade": false, + "grade_id": "cell-6cf53cb30ae3cd16", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer2.0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "2eb0aea7e3990bcf2277e1dba4b5b51e", + "grade": true, + "grade_id": "cell-9d0a3c3a16e0f47b", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of round(answer2.0, 2) is not numeric\"= setequal(digest(paste(toString(class(round(answer2.0, 2))), \"c421d\")), \"60b51a2c6c08e835154fbd3a85eafb69\"))\n", + "stopifnot(\"value of round(answer2.0, 2) is not correct (rounded to 2 decimal places)\"= setequal(digest(paste(toString(round(round(answer2.0, 2), 2)), \"c421d\")), \"b5753e2357b9e3c512b25411a68fdd61\"))\n", + "stopifnot(\"length of round(answer2.0, 2) is not correct\"= setequal(digest(paste(toString(length(round(answer2.0, 2))), \"c421d\")), \"aae0eb53bb949b3c6130cd8e6a07f130\"))\n", + "stopifnot(\"values of round(answer2.0, 2) are not correct\"= setequal(digest(paste(toString(sort(round(round(answer2.0, 2), 2))), \"c421d\")), \"b5753e2357b9e3c512b25411a68fdd61\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "6b0ec9b6ebc65c04901b41672a826102", + "grade": false, + "grade_id": "cell-7ef45d3d7c403c81", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.1**\n", + "
{points: 1}\n", + "\n", + "Use the graph titled \"Line B\" to roughly calculate the average squared vertical distance between the points and the purple line. Read values of the graph to a **precision of 0.25** (e.g. 1, 1.25, 1.5, 1.75, 2). Save your answer to a variable named `answer2.1`. \n", + "\n", + "*We reprint the plot for you in a larger size to make it easier to estimate the locations on the graph.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "444b814cf0b07058cd7a96d78d16878f", + "grade": false, + "grade_id": "cell-be8bd2be4d762d37", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "options(repr.plot.width = 9, repr.plot.height = 9)\n", + "line_b" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "334b4c6cae6c3456dd810592a4d52ad0", + "grade": false, + "grade_id": "cell-be5564f4bd6cd576", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer2.1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "82f34209479b394700790fc00386743b", + "grade": true, + "grade_id": "cell-840f1140c7655088", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of round(answer2.1, 2) is not numeric\"= setequal(digest(paste(toString(class(round(answer2.1, 2))), \"26b01\")), \"73fe4642b108c0c6fb9e31c5638f220d\"))\n", + "stopifnot(\"value of round(answer2.1, 2) is not correct (rounded to 2 decimal places)\"= setequal(digest(paste(toString(round(round(answer2.1, 2), 2)), \"26b01\")), \"7450db9b8c0f6a7562640316dfa0bc01\"))\n", + "stopifnot(\"length of round(answer2.1, 2) is not correct\"= setequal(digest(paste(toString(length(round(answer2.1, 2))), \"26b01\")), \"371c4d802308bc29de8f5243443264fb\"))\n", + "stopifnot(\"values of round(answer2.1, 2) are not correct\"= setequal(digest(paste(toString(sort(round(round(answer2.1, 2), 2))), \"26b01\")), \"7450db9b8c0f6a7562640316dfa0bc01\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "0865e28d085d526f20585bc4291aa24b", + "grade": false, + "grade_id": "cell-69f334fbd3120d91", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.2** \n", + "
{points: 1}\n", + "\n", + "Use the graph titled \"Line C\" to roughly calculate the average squared vertical distance between the points and the green line. Read values of the graph to a **precision of 0.25** (e.g. 1, 1.25, 1.5, 1.75, 2). Save your answer to a variable named `answer2.2`. \n", + "\n", + "*We reprint the plot for you in a larger size to make it easier to estimate the locations on the graph.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "bf9b8c6e27fa701139200bfc1c3b4710", + "grade": false, + "grade_id": "cell-0634261679ff7469", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "options(repr.plot.width = 9, repr.plot.height = 9)\n", + "line_c" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "e551479b4f19ab9328aa8b172ab46c9e", + "grade": false, + "grade_id": "cell-3aed32faefe82978", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer2.2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "5a32ee2a0224a1cbb04ba4d91910b608", + "grade": true, + "grade_id": "cell-3e544bd712b4d796", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of round(answer2.2) is not numeric\"= setequal(digest(paste(toString(class(round(answer2.2))), \"b6f4e\")), \"a9ff9b86408482a29adf68825849a2fc\"))\n", + "stopifnot(\"value of round(answer2.2) is not correct (rounded to 2 decimal places)\"= setequal(digest(paste(toString(round(round(answer2.2), 2)), \"b6f4e\")), \"10d782e505a4d946c033ae64a61f01d5\"))\n", + "stopifnot(\"length of round(answer2.2) is not correct\"= setequal(digest(paste(toString(length(round(answer2.2))), \"b6f4e\")), \"787c683899af26cd449f1d7825f44c94\"))\n", + "stopifnot(\"values of round(answer2.2) are not correct\"= setequal(digest(paste(toString(sort(round(round(answer2.2), 2))), \"b6f4e\")), \"10d782e505a4d946c033ae64a61f01d5\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "4fecdd64a1522109a436c961d7ee241f", + "grade": false, + "grade_id": "cell-ca359fbdc2020d22", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.3**\n", + "
{points: 1}\n", + "\n", + "Based on your calculations above, which line would linear regression by ordinary least squares choose given our small and simple dataset? Line A, B or C? Assign the letter that corresponds the line to a variable named `answer2.3`. Make sure you put quotations around the letter and pay attention to case." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "3213e56ff5f24a8c6ddf10c6680d10f4", + "grade": false, + "grade_id": "cell-c3bc0fc1f61fb31b", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "81314417b4cbf6c1b22ac22128ee0b79", + "grade": true, + "grade_id": "cell-4ae1ac995c661109", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer2.3 is not character\"= setequal(digest(paste(toString(class(answer2.3)), \"6e40a\")), \"31581ca444e3e522b6799848cc7d57b4\"))\n", + "stopifnot(\"length of answer2.3 is not correct\"= setequal(digest(paste(toString(length(answer2.3)), \"6e40a\")), \"46ae7667ffe66fc339f6d8552bf49ec5\"))\n", + "stopifnot(\"value of answer2.3 is not correct\"= setequal(digest(paste(toString(tolower(answer2.3)), \"6e40a\")), \"c3b8ffae6f3f1f2abc0c0e8c62432cc8\"))\n", + "stopifnot(\"letters in string value of answer2.3 are correct but case is not correct\"= setequal(digest(paste(toString(answer2.3), \"6e40a\")), \"7dac3ef201af4beeadb8007c86d45e40\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "49453d76b4c2fd8fe5322b4875396ce0", + "grade": false, + "grade_id": "cell-c450e26cb57e9dc9", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "## Marathon Training Revisited with Linear Regression!\n", + "\n", + "\n", + "\n", + "Source: https://media.giphy.com/media/BDagLpxFIm3SM/giphy.gif\n", + "\n", + "Remember our question from last week: what features predict whether athletes will perform better than others? Specifically, we are interested in marathon runners, and looking at how the maximum distance ran per week during training predicts the time it takes a runner to end the race? \n", + "\n", + "This time around, however, we will analyze the data using simple linear regression rather than $k$-nn regression. In the end, we will compare our results to what we found last week with $k$-nn regression." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "b871fcac04015c642b4c42f5908d6912", + "grade": false, + "grade_id": "cell-8655bd26820bea69", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.0**\n", + "
{points: 1}\n", + "\n", + "Load the `marathon` data and assign it to an object called `marathon`. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "d1a30ea92494ef32d2b05174ef0d8938", + "grade": false, + "grade_id": "cell-7429888f4a5a274a", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "marathon" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "8c2a3c9d3034fd0f6f44deb166a2f39f", + "grade": true, + "grade_id": "cell-36eda8d5f9545c0e", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"marathon should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon)), \"1acc1\")), \"ae70b371b8f45716725ad6ed47342dfc\"))\n", + "stopifnot(\"dimensions of marathon are not correct\"= setequal(digest(paste(toString(dim(marathon)), \"1acc1\")), \"b9802a9b3dddfe0ee07a7e2f6e7d0cdc\"))\n", + "stopifnot(\"column names of marathon are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon))), \"1acc1\")), \"a0d9dead321d7baca4daef454c2ba1e5\"))\n", + "stopifnot(\"types of columns in marathon are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon, class)))), \"1acc1\")), \"2b2cd994f5a194a03aa1042b91121b6e\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon, is.numeric))) sort(round(sapply(marathon[, sapply(marathon, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"1acc1\")), \"d6e3e150abbd8e2ff830c3a43e8fddc0\"))\n", + "stopifnot(\"values in one or more character columns in marathon are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon, is.character))) sum(sapply(marathon[sapply(marathon, is.character)], function(x) length(unique(x)))) else 0), \"1acc1\")), \"897000072cc9ce82681f2c1dd68b3a00\"))\n", + "stopifnot(\"values in one or more factor columns in marathon are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon, is.factor))) sum(sapply(marathon[, sapply(marathon, is.factor)], function(col) length(unique(col)))) else 0), \"1acc1\")), \"897000072cc9ce82681f2c1dd68b3a00\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "1bdfd007331d2a152db48c3bbb51d04c", + "grade": false, + "grade_id": "cell-439c2ccda058add2", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.1**\n", + "
{points: 1}\n", + "\n", + "Similar to what we have done for the last few weeks, we will first split the dataset into the training and testing datasets, using 75% of the original data as the training data. Remember, we will be putting the test dataset away in a 'lock box' that we will comeback to later after we choose our final model. In the `strata` argument of the `initial_split` function, place the variable we are trying to predict. Assign your split dataset to an object named `marathon_split`. \n", + "\n", + "Assign your training dataset to an object named `marathon_training` and your testing dataset to an object named `marathon_testing`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "867d9e669e311f27397efb3b1e4878bc", + "grade": false, + "grade_id": "cell-4862845e3f18eced", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "set.seed(2000) # DO NOT CHANGE THIS\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "6bc3ab4c555bab5f742c15a576e39df4", + "grade": true, + "grade_id": "cell-b1eb46161667bcec", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of 'rsplit' %in% class(marathon_split) is not logical\"= setequal(digest(paste(toString(class('rsplit' %in% class(marathon_split))), \"71ac4\")), \"870c923565a346b32afd6edd74a1fcc4\"))\n", + "stopifnot(\"logical value of 'rsplit' %in% class(marathon_split) is not correct\"= setequal(digest(paste(toString('rsplit' %in% class(marathon_split)), \"71ac4\")), \"c128f7d08e1a97bd5d7130fe20bae29f\"))\n", + "\n", + "stopifnot(\"marathon_training should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon_training)), \"71ac5\")), \"42de1b199d3ae516182745019109542b\"))\n", + "stopifnot(\"dimensions of marathon_training are not correct\"= setequal(digest(paste(toString(dim(marathon_training)), \"71ac5\")), \"dba79e5d0afe650395d1750b8bf39259\"))\n", + "stopifnot(\"column names of marathon_training are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon_training))), \"71ac5\")), \"0cab7459f91e73487ade5163168e4af7\"))\n", + "stopifnot(\"types of columns in marathon_training are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon_training, class)))), \"71ac5\")), \"42b6d84ea44fe5d15bebb1297ad9326a\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon_training are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_training, is.numeric))) sort(round(sapply(marathon_training[, sapply(marathon_training, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"71ac5\")), \"fa950c5c01ce9c933b6f68f1817b2db9\"))\n", + "stopifnot(\"values in one or more character columns in marathon_training are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_training, is.character))) sum(sapply(marathon_training[sapply(marathon_training, is.character)], function(x) length(unique(x)))) else 0), \"71ac5\")), \"30421dcc571f50e1cf88d0ab99a762bb\"))\n", + "stopifnot(\"values in one or more factor columns in marathon_training are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_training, is.factor))) sum(sapply(marathon_training[, sapply(marathon_training, is.factor)], function(col) length(unique(col)))) else 0), \"71ac5\")), \"30421dcc571f50e1cf88d0ab99a762bb\"))\n", + "\n", + "stopifnot(\"marathon_testing should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon_testing)), \"71ac6\")), \"e04fd132545851d3ea14dd4f5aa6e235\"))\n", + "stopifnot(\"dimensions of marathon_testing are not correct\"= setequal(digest(paste(toString(dim(marathon_testing)), \"71ac6\")), \"c9610fb3584cf217219e358ef972e7e4\"))\n", + "stopifnot(\"column names of marathon_testing are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon_testing))), \"71ac6\")), \"22c3445ff5c2cd059b263b6b79f27ab2\"))\n", + "stopifnot(\"types of columns in marathon_testing are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon_testing, class)))), \"71ac6\")), \"e119200748401e4b4bec40953e8bc6df\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon_testing are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_testing, is.numeric))) sort(round(sapply(marathon_testing[, sapply(marathon_testing, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"71ac6\")), \"3e0e18b649424c4c945ca0145bef2800\"))\n", + "stopifnot(\"values in one or more character columns in marathon_testing are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_testing, is.character))) sum(sapply(marathon_testing[sapply(marathon_testing, is.character)], function(x) length(unique(x)))) else 0), \"71ac6\")), \"6b860e416b6e8e441818ec6be5d4995d\"))\n", + "stopifnot(\"values in one or more factor columns in marathon_testing are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_testing, is.factor))) sum(sapply(marathon_testing[, sapply(marathon_testing, is.factor)], function(col) length(unique(col)))) else 0), \"71ac6\")), \"6b860e416b6e8e441818ec6be5d4995d\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "ada7d473076efb49c499804ec2b64090", + "grade": false, + "grade_id": "cell-0ebfe08674f42eae", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.2**\n", + "
{points: 1}\n", + "\n", + "Using only the observations in the training dataset, create a scatterplot to assess the relationship between race time (`time_hrs`) and maximum distance ran per week during training (`max`). Put `time_hrs` on the y-axis and `max` on the x-axis. Assign this plot to an object called `marathon_eda`. Remember to do whatever is necessary to make this an effective visualization." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "7f2e15b170d6d4bf45138d839d8b6a42", + "grade": false, + "grade_id": "cell-4a1c52e071e0b23e", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "options(repr.plot.height = 8, repr.plot.width = 7)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "marathon_eda" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "9fe7eedbc69e1feea544ecaa584f0960", + "grade": true, + "grade_id": "cell-883edd273699e4b7", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of plot is not correct (if you are using two types of geoms, try flipping the order of the geom objects!)\"= setequal(digest(paste(toString(sapply(seq_len(length(marathon_eda$layers)), function(i) {c(class(marathon_eda$layers[[i]]$geom))[1]})), \"11fac\")), \"4459ab0fafff46beffafd74322e39f33\"))\n", + "stopifnot(\"variable x is not correct\"= setequal(digest(paste(toString(unlist(lapply(sapply(seq_len(length(marathon_eda$layers)), function(i) {rlang::get_expr(c(marathon_eda$layers[[i]]$mapping, marathon_eda$mapping)$x)}), as.character))), \"11fac\")), \"ff45305bbbd821a02854aa03e229b841\"))\n", + "stopifnot(\"variable y is not correct\"= setequal(digest(paste(toString(unlist(lapply(sapply(seq_len(length(marathon_eda$layers)), function(i) {rlang::get_expr(c(marathon_eda$layers[[i]]$mapping, marathon_eda$mapping)$y)}), as.character))), \"11fac\")), \"63e3299ade73457551067f0c882c5a8c\"))\n", + "stopifnot(\"x-axis label is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_eda$layers[[1]]$mapping, marathon_eda$mapping)$x)!= marathon_eda$labels$x), \"11fac\")), \"0f8c484bf7bc498eb68af14f6135c7ce\"))\n", + "stopifnot(\"y-axis label is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_eda$layers[[1]]$mapping, marathon_eda$mapping)$y)!= marathon_eda$labels$y), \"11fac\")), \"0f8c484bf7bc498eb68af14f6135c7ce\"))\n", + "stopifnot(\"incorrect colour variable in marathon_eda, specify a correct one if required\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_eda$layers[[1]]$mapping, marathon_eda$mapping)$colour)), \"11fac\")), \"f720d575ebde6c6d0020075d65c620ba\"))\n", + "stopifnot(\"incorrect shape variable in marathon_eda, specify a correct one if required\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_eda$layers[[1]]$mapping, marathon_eda$mapping)$shape)), \"11fac\")), \"f720d575ebde6c6d0020075d65c620ba\"))\n", + "stopifnot(\"the colour label in marathon_eda is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_eda$layers[[1]]$mapping, marathon_eda$mapping)$colour) != marathon_eda$labels$colour), \"11fac\")), \"f720d575ebde6c6d0020075d65c620ba\"))\n", + "stopifnot(\"the shape label in marathon_eda is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_eda$layers[[1]]$mapping, marathon_eda$mapping)$colour) != marathon_eda$labels$shape), \"11fac\")), \"f720d575ebde6c6d0020075d65c620ba\"))\n", + "stopifnot(\"fill variable in marathon_eda is not correct\"= setequal(digest(paste(toString(quo_name(marathon_eda$mapping$fill)), \"11fac\")), \"4dd26f2adb5019221d751562ad1605ea\"))\n", + "stopifnot(\"fill label in marathon_eda is not informative\"= setequal(digest(paste(toString((quo_name(marathon_eda$mapping$fill) != marathon_eda$labels$fill)), \"11fac\")), \"f720d575ebde6c6d0020075d65c620ba\"))\n", + "stopifnot(\"position argument in marathon_eda is not correct\"= setequal(digest(paste(toString(class(marathon_eda$layers[[1]]$position)[1]), \"11fac\")), \"7bc0a64ae1fbaf25265604c8c06f747f\"))\n", + "\n", + "stopifnot(\"marathon_eda$data should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon_eda$data)), \"11fad\")), \"f45fafe1072c34829ea6fb8e233b9781\"))\n", + "stopifnot(\"dimensions of marathon_eda$data are not correct\"= setequal(digest(paste(toString(dim(marathon_eda$data)), \"11fad\")), \"1300556bdc994b3feb7c50db1edef550\"))\n", + "stopifnot(\"column names of marathon_eda$data are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon_eda$data))), \"11fad\")), \"5f03d7c878564320eb15d49ccdbc6adb\"))\n", + "stopifnot(\"types of columns in marathon_eda$data are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon_eda$data, class)))), \"11fad\")), \"55d9d03b6a1fdb2d57671eaf35751163\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon_eda$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_eda$data, is.numeric))) sort(round(sapply(marathon_eda$data[, sapply(marathon_eda$data, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"11fad\")), \"1dead0c6529f779942736f60674097c6\"))\n", + "stopifnot(\"values in one or more character columns in marathon_eda$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_eda$data, is.character))) sum(sapply(marathon_eda$data[sapply(marathon_eda$data, is.character)], function(x) length(unique(x)))) else 0), \"11fad\")), \"d0c94533be178ca3d7d3f13fae051b9e\"))\n", + "stopifnot(\"values in one or more factor columns in marathon_eda$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_eda$data, is.factor))) sum(sapply(marathon_eda$data[, sapply(marathon_eda$data, is.factor)], function(col) length(unique(col)))) else 0), \"11fad\")), \"d0c94533be178ca3d7d3f13fae051b9e\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "d195b1a04cc840d00ca166ae12c124ca", + "grade": false, + "grade_id": "cell-c66b91eb433ebef7", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.3**\n", + "
{points: 1}\n", + "\n", + "Now that we have our training data, the next step is to build a linear regression model specification. Thankfully, building other model specifications is quite straightforward since we will still go through the same procedure (indicate the function, the engine and the mode). \n", + "\n", + "Instead of using the `nearest_neighbor` function, we will be using the `linear_reg` function to let `tidymodels` know we want to perform a linear regression. In the `set_engine` function, we have typically set `\"kknn\"` there for $k$-nn. Since we are doing a linear regression here, set `\"lm\"` as the engine. Finally, instead of setting `\"classification\"` as the mode, set `\"regression\"` as the mode. \n", + "\n", + "Assign your answer to an object named `lm_spec`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "1530ffd4f16d72b585967be1e5cc154c", + "grade": false, + "grade_id": "cell-9736241c0c2966b9", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "31561987a5c3b62338c5cb3e308070ff", + "grade": true, + "grade_id": "cell-a647adab28a3dfb2", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"lm_spec should be a model specification\"= setequal(digest(paste(toString('model_spec' %in% class(lm_spec)), \"d0889\")), \"a0dacbf88e81803337b8c7997b77466e\"))\n", + "stopifnot(\"model specification in lm_spec is not correct\"= setequal(digest(paste(toString(lm_spec$mode), \"d0889\")), \"875da79c33870ef8b6ae4c843ba107b1\"))\n", + "stopifnot(\"computational engine in lm_spec is not correct\"= setequal(digest(paste(toString(lm_spec$engine), \"d0889\")), \"574bc5e346e71d4f46b8c690ef333ed1\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "f5dab0b1a9c4ff92a05207dba7fe1c6f", + "grade": false, + "grade_id": "cell-29d06cfa3e0ab3bc", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.3.1**\n", + "
{points: 1}\n", + "\n", + "After we have created our linear regression model specification, the next step is to create a recipe, establish a workflow analysis and fit our simple linear regression model. \n", + "\n", + "First, create a recipe with the variables of interest (race time and max weekly training distance) using the training dataset and assign your answer to an object named `lm_recipe`. \n", + "\n", + "Then, create a workflow analysis with our model specification and recipe. Remember to fit in the training dataset as well. Assign your answer to an object named `lm_fit`. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "303c4ba43630124883d8c60b0aa8039c", + "grade": false, + "grade_id": "cell-72f6968d56ed879f", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#... <- recipe(... ~ ..., data = ...)\n", + "\n", + "#... <- workflow() |>\n", + "# add_recipe(...) |>\n", + "# add_model(...) |>\n", + "# fit(...)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "lm_fit" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "9586012afa3a9bef9434b9116dcf2119", + "grade": true, + "grade_id": "cell-9d3b1ff7fdd3faa5", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"lm_recipe should be a recipe\"= setequal(digest(paste(toString('recipe' %in% class(lm_recipe)), \"75481\")), \"9f9d6a2ddbcb5fa648535344804606f9\"))\n", + "stopifnot(\"response variable of lm_recipe is not correct\"= setequal(digest(paste(toString(sort(filter(lm_recipe$var_info, role == 'outcome')$variable)), \"75481\")), \"b8ecc843c3922304280df63250a47bf0\"))\n", + "stopifnot(\"predictor variable(s) of lm_recipe are not correct\"= setequal(digest(paste(toString(sort(filter(lm_recipe$var_info, role == 'predictor')$variable)), \"75481\")), \"adbdaeb8f5967844a99791b00b963cd0\"))\n", + "stopifnot(\"lm_recipe does not contain the correct data, might need to be standardized\"= setequal(digest(paste(toString(round(sum(bake(prep(lm_recipe), lm_recipe$template) %>% select_if(is.numeric), na.rm = TRUE), 2)), \"75481\")), \"1f4c1cba79358be26d7b63a420618b8b\"))\n", + "\n", + "stopifnot(\"lm_fit should be a workflow\"= setequal(digest(paste(toString('workflow' %in% class(lm_fit)), \"75482\")), \"cce7000d9ae3d342f9246a159eecb980\"))\n", + "stopifnot(\"computational engine used in lm_fit is not correct\"= setequal(digest(paste(toString(lm_fit$fit$actions$model$spec$engine), \"75482\")), \"fe034b6280904548b52fc19abbe6edff\"))\n", + "stopifnot(\"model specification used in lm_fit is not correct\"= setequal(digest(paste(toString(lm_fit$fit$actions$model$spec$mode), \"75482\")), \"3cae09094e67e2f99a265c29a8d3918b\"))\n", + "stopifnot(\"lm_fit must be a trained workflow, make sure to call the fit() function\"= setequal(digest(paste(toString(lm_fit$trained), \"75482\")), \"cce7000d9ae3d342f9246a159eecb980\"))\n", + "stopifnot(\"predictor variable(s) of lm_fit are not correct\"= setequal(digest(paste(toString(sort(filter(lm_fit$pre$actions$recipe$recipe$var_info, role == 'predictor')$variable)), \"75482\")), \"ff0c2ccbae3e98ee9211ee70e3ad1e8a\"))\n", + "stopifnot(\"lm_fit does not contain the correct data\"= setequal(digest(paste(toString(sort(vapply(lm_fit$pre$mold$predictors[, sapply(lm_fit$pre$mold$predictors, is.numeric)], function(col) if(!is.null(col)) round(sum(col), 2) else NA_real_, numeric(1)), na.last = NA)), \"75482\")), \"be94b1f7d4616144f9d51afb14b8b17f\"))\n", + "stopifnot(\"did not fit lm_fit on the training dataset\"= setequal(digest(paste(toString(nrow(lm_fit$pre$mold$outcomes)), \"75482\")), \"ee77ee2ba1694b3dc410cf44a5e0efce\"))\n", + "stopifnot(\"for classification/regression models, weight function is not correct\"= setequal(digest(paste(toString(quo_name(lm_fit$fit$actions$model$spec$args$weight_func)), \"75482\")), \"a2a2f068d8e40043fd0b88d09299bd2e\"))\n", + "stopifnot(\"for classification/regression models, response variable of lm_fit is not correct\"= setequal(digest(paste(toString(sort(filter(lm_fit$pre$actions$recipe$recipe$var_info, role == 'outcome')$variable)), \"75482\")), \"5d2ee0077f65d653f1f8d63a42599054\"))\n", + "stopifnot(\"for KNN models, number of neighbours is not correct\"= setequal(digest(paste(toString(quo_name(lm_fit$fit$actions$model$spec$args$neighbors)), \"75482\")), \"a2a2f068d8e40043fd0b88d09299bd2e\"))\n", + "stopifnot(\"for clustering models, the clustering is not correct\"= setequal(digest(paste(toString(lm_fit$fit$fit$fit$cluster), \"75482\")), \"c2b79e4fa40c9504e66f260d9a9b56c6\"))\n", + "stopifnot(\"for clustering models, the total within-cluster sum-of-squared distances is not correct\"= setequal(digest(paste(toString(if (!is.null(lm_fit$fit$fit$fit$tot.withinss)) round(lm_fit$fit$fit$fit$tot.withinss, 2) else NULL), \"75482\")), \"c2b79e4fa40c9504e66f260d9a9b56c6\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "1de9e767bea811b0f51ac4725b4b4b84", + "grade": false, + "grade_id": "cell-dad798df6163dff4", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.4**\n", + "
{points: 1}\n", + "\n", + "Now, let's visualize the model predictions as a straight line overlaid on the training data. Use the `predict` and `bind_cols` functions on `lm_fit` to create predictions for the `marathon_training` data. Name the resulting data frame `marathon_preds`.\n", + "\n", + "Next, create a scatterplot with the marathon time (y-axis) against the maximum distance run per week (x-axis) from `marathon_preds`. Use an alpha value of 0.4 to avoid overplotting. **Plot the predictions as a blue line over the data points.** Assign your plot to a variable called `lm_predictions`. Remember the fundamentals of effective visualizations such as having a human-readable axes titles. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "4cbea9991bcdf02b0340eed13dd5be0a", + "grade": false, + "grade_id": "cell-6131349a47c37876", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "options(repr.plot.width = 8, repr.plot.height = 7)\n", + "\n", + "# marathon_preds <- ... |>\n", + "# predict(...) |>\n", + "# bind_cols(...)\n", + "#\n", + "# lm_predictions <- marathon_preds |>\n", + "# ...(aes(x = ..., y = ...)) +\n", + "# geom_point(... = 0.4) +\n", + "# geom_line(\n", + "# mapping = aes(x = ..., y = ...), \n", + "# color = \"blue\") +\n", + "# xlab(\"...\") +\n", + "# ylab(\"...\") +\n", + "# theme(text = ...(size = 20))\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "lm_predictions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "c2acd42915d79e6c2235871620ec0fd8", + "grade": true, + "grade_id": "cell-4664079ebe7d0892", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"marathon_preds should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon_preds)), \"92a43\")), \"9da11dcd29eb47f90ac1aefabe6c9455\"))\n", + "stopifnot(\"dimensions of marathon_preds are not correct\"= setequal(digest(paste(toString(dim(marathon_preds)), \"92a43\")), \"74088c002f1026d9e09caf9aa1594fa8\"))\n", + "stopifnot(\"column names of marathon_preds are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon_preds))), \"92a43\")), \"3d11ae5f1d1aa863b880dfa9c0306f8d\"))\n", + "stopifnot(\"types of columns in marathon_preds are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon_preds, class)))), \"92a43\")), \"dc8fa49c861bb94fc17b26fcd8616273\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon_preds are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_preds, is.numeric))) sort(round(sapply(marathon_preds[, sapply(marathon_preds, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"92a43\")), \"2e034bbbff8419bdc580d52b81bff2ba\"))\n", + "stopifnot(\"values in one or more character columns in marathon_preds are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_preds, is.character))) sum(sapply(marathon_preds[sapply(marathon_preds, is.character)], function(x) length(unique(x)))) else 0), \"92a43\")), \"af2001c6c829297968ba45ced5235ec8\"))\n", + "stopifnot(\"values in one or more factor columns in marathon_preds are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_preds, is.factor))) sum(sapply(marathon_preds[, sapply(marathon_preds, is.factor)], function(col) length(unique(col)))) else 0), \"92a43\")), \"af2001c6c829297968ba45ced5235ec8\"))\n", + "\n", + "stopifnot(\"type of plot is not correct (if you are using two types of geoms, try flipping the order of the geom objects!)\"= setequal(digest(paste(toString(sapply(seq_len(length(lm_predictions$layers)), function(i) {c(class(lm_predictions$layers[[i]]$geom))[1]})), \"92a44\")), \"fb744d3c7ec20538e4f5cd822395cc85\"))\n", + "stopifnot(\"variable x is not correct\"= setequal(digest(paste(toString(unlist(lapply(sapply(seq_len(length(lm_predictions$layers)), function(i) {rlang::get_expr(c(lm_predictions$layers[[i]]$mapping, lm_predictions$mapping)$x)}), as.character))), \"92a44\")), \"74c2844a5f83b6daa40aaec67fa2e92a\"))\n", + "stopifnot(\"variable y is not correct\"= setequal(digest(paste(toString(unlist(lapply(sapply(seq_len(length(lm_predictions$layers)), function(i) {rlang::get_expr(c(lm_predictions$layers[[i]]$mapping, lm_predictions$mapping)$y)}), as.character))), \"92a44\")), \"1ddf82aafb1c88458884d9df5a673917\"))\n", + "stopifnot(\"x-axis label is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions$layers[[1]]$mapping, lm_predictions$mapping)$x)!= lm_predictions$labels$x), \"92a44\")), \"9d3822c574af2fed6d28ab707c284cce\"))\n", + "stopifnot(\"y-axis label is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions$layers[[1]]$mapping, lm_predictions$mapping)$y)!= lm_predictions$labels$y), \"92a44\")), \"9d3822c574af2fed6d28ab707c284cce\"))\n", + "stopifnot(\"incorrect colour variable in lm_predictions, specify a correct one if required\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions$layers[[1]]$mapping, lm_predictions$mapping)$colour)), \"92a44\")), \"b2107b37b60426a99544ad984154f1a3\"))\n", + "stopifnot(\"incorrect shape variable in lm_predictions, specify a correct one if required\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions$layers[[1]]$mapping, lm_predictions$mapping)$shape)), \"92a44\")), \"b2107b37b60426a99544ad984154f1a3\"))\n", + "stopifnot(\"the colour label in lm_predictions is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions$layers[[1]]$mapping, lm_predictions$mapping)$colour) != lm_predictions$labels$colour), \"92a44\")), \"b2107b37b60426a99544ad984154f1a3\"))\n", + "stopifnot(\"the shape label in lm_predictions is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions$layers[[1]]$mapping, lm_predictions$mapping)$colour) != lm_predictions$labels$shape), \"92a44\")), \"b2107b37b60426a99544ad984154f1a3\"))\n", + "stopifnot(\"fill variable in lm_predictions is not correct\"= setequal(digest(paste(toString(quo_name(lm_predictions$mapping$fill)), \"92a44\")), \"8fc821fdbb622da7ac6e6263a813b77f\"))\n", + "stopifnot(\"fill label in lm_predictions is not informative\"= setequal(digest(paste(toString((quo_name(lm_predictions$mapping$fill) != lm_predictions$labels$fill)), \"92a44\")), \"b2107b37b60426a99544ad984154f1a3\"))\n", + "stopifnot(\"position argument in lm_predictions is not correct\"= setequal(digest(paste(toString(class(lm_predictions$layers[[1]]$position)[1]), \"92a44\")), \"44da79b989930014234165d79853228d\"))\n", + "\n", + "stopifnot(\"lm_predictions$data should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(lm_predictions$data)), \"92a45\")), \"5b824784ce6d25ac0d227cfbc08effb6\"))\n", + "stopifnot(\"dimensions of lm_predictions$data are not correct\"= setequal(digest(paste(toString(dim(lm_predictions$data)), \"92a45\")), \"85a2b44d298b323abf26964fcdec67cb\"))\n", + "stopifnot(\"column names of lm_predictions$data are not correct\"= setequal(digest(paste(toString(sort(colnames(lm_predictions$data))), \"92a45\")), \"98268a0def9229d53e704e820f4b9fb7\"))\n", + "stopifnot(\"types of columns in lm_predictions$data are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(lm_predictions$data, class)))), \"92a45\")), \"6e358fb6c724c4683b8bc75631365183\"))\n", + "stopifnot(\"values in one or more numerical columns in lm_predictions$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(lm_predictions$data, is.numeric))) sort(round(sapply(lm_predictions$data[, sapply(lm_predictions$data, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"92a45\")), \"249f484e2bd4edf34b3344f3dbbccac3\"))\n", + "stopifnot(\"values in one or more character columns in lm_predictions$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(lm_predictions$data, is.character))) sum(sapply(lm_predictions$data[sapply(lm_predictions$data, is.character)], function(x) length(unique(x)))) else 0), \"92a45\")), \"17108f98507ffe522490eb8f6ccdd547\"))\n", + "stopifnot(\"values in one or more factor columns in lm_predictions$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(lm_predictions$data, is.factor))) sum(sapply(lm_predictions$data[, sapply(lm_predictions$data, is.factor)], function(col) length(unique(col)))) else 0), \"92a45\")), \"17108f98507ffe522490eb8f6ccdd547\"))\n", + "\n", + "stopifnot(\"type of as.character(lm_predictions$layers[[2]]$aes_params) is not character\"= setequal(digest(paste(toString(class(as.character(lm_predictions$layers[[2]]$aes_params))), \"92a46\")), \"a53836556fa0edf8ff013e891f1f9bd9\"))\n", + "stopifnot(\"length of as.character(lm_predictions$layers[[2]]$aes_params) is not correct\"= setequal(digest(paste(toString(length(as.character(lm_predictions$layers[[2]]$aes_params))), \"92a46\")), \"1483d186549c4800922240ea9731d38e\"))\n", + "stopifnot(\"value of as.character(lm_predictions$layers[[2]]$aes_params) is not correct\"= setequal(digest(paste(toString(tolower(as.character(lm_predictions$layers[[2]]$aes_params))), \"92a46\")), \"96b2060698cde202d2dec4eacaf0ef4a\"))\n", + "stopifnot(\"letters in string value of as.character(lm_predictions$layers[[2]]$aes_params) are correct but case is not correct\"= setequal(digest(paste(toString(as.character(lm_predictions$layers[[2]]$aes_params)), \"92a46\")), \"96b2060698cde202d2dec4eacaf0ef4a\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "51ea2afad1f6ea01d1549e3d933ac38d", + "grade": false, + "grade_id": "cell-c4273b717c7a3cb7", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.5**\n", + "
{points: 1}\n", + "\n", + "Great! We can now see the line of best fit on the graph. Now let's calculate the $RMSPE$ using the **test data**. To get to this point, first, use the `lm_fit` to make predictions on the test data. Remember to bind the appropriate columns for the test data. Afterwards, collect the metrics and store it in an object called `lm_test_results`.\n", + "\n", + "From `lm_test_results`, extract the $RMSPE$ and return a single numerical value. Assign your answer to an object named `lm_rmspe`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "9d5f238c35ed6d87f0820b963ef8e70b", + "grade": false, + "grade_id": "cell-5f42a9ac9068cfdf", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "#... <- lm_fit |>\n", + "# predict(...) |>\n", + "# bind_cols(...) |>\n", + "# metrics(truth = ..., estimate = ..)\n", + "\n", + "#... <- lm_test_results |>\n", + "# filter(...) |>\n", + "# select(...) |>\n", + "# ...\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "lm_rmspe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "6443934a920e993bfe92b39c5786aaa0", + "grade": true, + "grade_id": "cell-96a0627f99b93667", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"lm_test_results should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(lm_test_results)), \"897a7\")), \"b810641e52df8b085ad482ea493a8e75\"))\n", + "stopifnot(\"dimensions of lm_test_results are not correct\"= setequal(digest(paste(toString(dim(lm_test_results)), \"897a7\")), \"9770727e09ce49505b380eeecf31decb\"))\n", + "stopifnot(\"column names of lm_test_results are not correct\"= setequal(digest(paste(toString(sort(colnames(lm_test_results))), \"897a7\")), \"3b139e12b7dcae96919fad7daf30dc88\"))\n", + "stopifnot(\"types of columns in lm_test_results are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(lm_test_results, class)))), \"897a7\")), \"3a394aca9254bf32b1659200c8bf5307\"))\n", + "stopifnot(\"values in one or more numerical columns in lm_test_results are not correct\"= setequal(digest(paste(toString(if (any(sapply(lm_test_results, is.numeric))) sort(round(sapply(lm_test_results[, sapply(lm_test_results, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"897a7\")), \"88bd1c199e045c8b65b425f2deed668b\"))\n", + "stopifnot(\"values in one or more character columns in lm_test_results are not correct\"= setequal(digest(paste(toString(if (any(sapply(lm_test_results, is.character))) sum(sapply(lm_test_results[sapply(lm_test_results, is.character)], function(x) length(unique(x)))) else 0), \"897a7\")), \"855933d0f5e22f016f6baddd4ffaac61\"))\n", + "stopifnot(\"values in one or more factor columns in lm_test_results are not correct\"= setequal(digest(paste(toString(if (any(sapply(lm_test_results, is.factor))) sum(sapply(lm_test_results[, sapply(lm_test_results, is.factor)], function(col) length(unique(col)))) else 0), \"897a7\")), \"1e2484c0e0b3dd4af86bcff5e3d1b54f\"))\n", + "\n", + "stopifnot(\"type of round(lm_rmspe, 2) is not numeric\"= setequal(digest(paste(toString(class(round(lm_rmspe, 2))), \"897a8\")), \"1aa278f1def9d912be586c16e682e781\"))\n", + "stopifnot(\"value of round(lm_rmspe, 2) is not correct (rounded to 2 decimal places)\"= setequal(digest(paste(toString(round(round(lm_rmspe, 2), 2)), \"897a8\")), \"971da5ed9e27704013c9e9add30f4846\"))\n", + "stopifnot(\"length of round(lm_rmspe, 2) is not correct\"= setequal(digest(paste(toString(length(round(lm_rmspe, 2))), \"897a8\")), \"451753c10c17df6c5e9b434ce9093dff\"))\n", + "stopifnot(\"values of round(lm_rmspe, 2) are not correct\"= setequal(digest(paste(toString(sort(round(round(lm_rmspe, 2), 2))), \"897a8\")), \"971da5ed9e27704013c9e9add30f4846\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "703a97273e060a23864acb4febf15f4d", + "grade": false, + "grade_id": "cell-248f1e3467b09c40", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.5.1**\n", + "
{points: 1}\n", + "\n", + "Now, let's visualize the model predictions as a straight line overlaid on the test data. First, create a scatterplot to assess the relationship between race time (`time_hrs`) and maximum distance ran per week during training (`max`) on the **testing data.** Use and alpha value of 0.4 to avoid overplotting. Then add a line to the plot corresponding to the predictions from the fit linear regression model. Remember to do whatever is necessary to make this an effective visualization.\n", + "\n", + "*Assign the plot to an object called `lm_predictions_test`.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "d8e6bd06a9c30160ca9342bae87c9efa", + "grade": false, + "grade_id": "cell-498c80d6fa9369a7", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "options(repr.plot.width = 8, repr.plot.height = 7)\n", + "\n", + "# test_preds <- ...\n", + "\n", + "# lm_predictions_test <- ...\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "lm_predictions_test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "8ac7774e84a41ae09dd036dff5221503", + "grade": true, + "grade_id": "cell-218d130332eebf2a", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"test_preds should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(test_preds)), \"34e17\")), \"b9c2981a0e8d6cf89e98fa7736d5f467\"))\n", + "stopifnot(\"dimensions of test_preds are not correct\"= setequal(digest(paste(toString(dim(test_preds)), \"34e17\")), \"e69a8e0f204485549c3aa56a8d089150\"))\n", + "stopifnot(\"column names of test_preds are not correct\"= setequal(digest(paste(toString(sort(colnames(test_preds))), \"34e17\")), \"4dbd60436948e508ce4d7dc43da278fc\"))\n", + "stopifnot(\"types of columns in test_preds are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(test_preds, class)))), \"34e17\")), \"f4be2b89540633a23ce4264b495756f4\"))\n", + "stopifnot(\"values in one or more numerical columns in test_preds are not correct\"= setequal(digest(paste(toString(if (any(sapply(test_preds, is.numeric))) sort(round(sapply(test_preds[, sapply(test_preds, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"34e17\")), \"4c5890ff8271bd200379f553c61394c1\"))\n", + "stopifnot(\"values in one or more character columns in test_preds are not correct\"= setequal(digest(paste(toString(if (any(sapply(test_preds, is.character))) sum(sapply(test_preds[sapply(test_preds, is.character)], function(x) length(unique(x)))) else 0), \"34e17\")), \"44ecda4ed9d0f455b7d326734d48e06e\"))\n", + "stopifnot(\"values in one or more factor columns in test_preds are not correct\"= setequal(digest(paste(toString(if (any(sapply(test_preds, is.factor))) sum(sapply(test_preds[, sapply(test_preds, is.factor)], function(col) length(unique(col)))) else 0), \"34e17\")), \"44ecda4ed9d0f455b7d326734d48e06e\"))\n", + "\n", + "stopifnot(\"type of plot is not correct (if you are using two types of geoms, try flipping the order of the geom objects!)\"= setequal(digest(paste(toString(sapply(seq_len(length(lm_predictions_test$layers)), function(i) {c(class(lm_predictions_test$layers[[i]]$geom))[1]})), \"34e18\")), \"e996b7462be5db50fddea027a41e5d41\"))\n", + "stopifnot(\"variable x is not correct\"= setequal(digest(paste(toString(unlist(lapply(sapply(seq_len(length(lm_predictions_test$layers)), function(i) {rlang::get_expr(c(lm_predictions_test$layers[[i]]$mapping, lm_predictions_test$mapping)$x)}), as.character))), \"34e18\")), \"dc26821c948a88627636fbaff8ba49fa\"))\n", + "stopifnot(\"variable y is not correct\"= setequal(digest(paste(toString(unlist(lapply(sapply(seq_len(length(lm_predictions_test$layers)), function(i) {rlang::get_expr(c(lm_predictions_test$layers[[i]]$mapping, lm_predictions_test$mapping)$y)}), as.character))), \"34e18\")), \"f859c22b95960db245622f8bda7a7c48\"))\n", + "stopifnot(\"x-axis label is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions_test$layers[[1]]$mapping, lm_predictions_test$mapping)$x)!= lm_predictions_test$labels$x), \"34e18\")), \"d32017ce78d71b2418a43f5c2b1db675\"))\n", + "stopifnot(\"y-axis label is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions_test$layers[[1]]$mapping, lm_predictions_test$mapping)$y)!= lm_predictions_test$labels$y), \"34e18\")), \"d32017ce78d71b2418a43f5c2b1db675\"))\n", + "stopifnot(\"incorrect colour variable in lm_predictions_test, specify a correct one if required\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions_test$layers[[1]]$mapping, lm_predictions_test$mapping)$colour)), \"34e18\")), \"5bb6ade5c7dda10413c785b77ae992f1\"))\n", + "stopifnot(\"incorrect shape variable in lm_predictions_test, specify a correct one if required\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions_test$layers[[1]]$mapping, lm_predictions_test$mapping)$shape)), \"34e18\")), \"5bb6ade5c7dda10413c785b77ae992f1\"))\n", + "stopifnot(\"the colour label in lm_predictions_test is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions_test$layers[[1]]$mapping, lm_predictions_test$mapping)$colour) != lm_predictions_test$labels$colour), \"34e18\")), \"5bb6ade5c7dda10413c785b77ae992f1\"))\n", + "stopifnot(\"the shape label in lm_predictions_test is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions_test$layers[[1]]$mapping, lm_predictions_test$mapping)$colour) != lm_predictions_test$labels$shape), \"34e18\")), \"5bb6ade5c7dda10413c785b77ae992f1\"))\n", + "stopifnot(\"fill variable in lm_predictions_test is not correct\"= setequal(digest(paste(toString(quo_name(lm_predictions_test$mapping$fill)), \"34e18\")), \"a5333b2a9e34a3481fa9ab37923f2179\"))\n", + "stopifnot(\"fill label in lm_predictions_test is not informative\"= setequal(digest(paste(toString((quo_name(lm_predictions_test$mapping$fill) != lm_predictions_test$labels$fill)), \"34e18\")), \"5bb6ade5c7dda10413c785b77ae992f1\"))\n", + "stopifnot(\"position argument in lm_predictions_test is not correct\"= setequal(digest(paste(toString(class(lm_predictions_test$layers[[1]]$position)[1]), \"34e18\")), \"cb745c6bdfda658a5f56a95dbea00f45\"))\n", + "\n", + "stopifnot(\"lm_predictions_test$data should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(lm_predictions_test$data)), \"34e19\")), \"d7dad6698210cfacabf62c3bae8b5644\"))\n", + "stopifnot(\"dimensions of lm_predictions_test$data are not correct\"= setequal(digest(paste(toString(dim(lm_predictions_test$data)), \"34e19\")), \"fa31829e597d4b489d7829cfb90bf196\"))\n", + "stopifnot(\"column names of lm_predictions_test$data are not correct\"= setequal(digest(paste(toString(sort(colnames(lm_predictions_test$data))), \"34e19\")), \"7e6df8b1b9cb0ea102b917bf0ba32e62\"))\n", + "stopifnot(\"types of columns in lm_predictions_test$data are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(lm_predictions_test$data, class)))), \"34e19\")), \"469aeceaad0a86845823d2423e05da29\"))\n", + "stopifnot(\"values in one or more numerical columns in lm_predictions_test$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(lm_predictions_test$data, is.numeric))) sort(round(sapply(lm_predictions_test$data[, sapply(lm_predictions_test$data, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"34e19\")), \"72dd04320632d407e2e53309710b3ac5\"))\n", + "stopifnot(\"values in one or more character columns in lm_predictions_test$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(lm_predictions_test$data, is.character))) sum(sapply(lm_predictions_test$data[sapply(lm_predictions_test$data, is.character)], function(x) length(unique(x)))) else 0), \"34e19\")), \"cb8aa06bd1d7a4150bc39ea915c0afcc\"))\n", + "stopifnot(\"values in one or more factor columns in lm_predictions_test$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(lm_predictions_test$data, is.factor))) sum(sapply(lm_predictions_test$data[, sapply(lm_predictions_test$data, is.factor)], function(col) length(unique(col)))) else 0), \"34e19\")), \"cb8aa06bd1d7a4150bc39ea915c0afcc\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "d83a8c2e08816813c9fe49068b2f3d6a", + "grade": false, + "grade_id": "cell-3379505df4a3aef4", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.6**\n", + "
{points: 1}\n", + "\n", + "Compare the test RMPSE of k-nn regression (`0.606` from last worksheet) to that of simple linear regression, which is greater? \n", + "\n", + "A. $k$-nn regression has a greater RMSPE\n", + "\n", + "B. Simple linear regression has a greater RMSPE\n", + "\n", + "C. Neither, they are identical\n", + "\n", + "Save the letter of the answer you think is correct to a variable named `answer3.6`. Make sure you put quotations around the letter and pay attention to case." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "5ac95738cb9b73954da0c7aa71f20cac", + "grade": false, + "grade_id": "cell-e633b4ce6799432e", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "fedddf8df734c0c9f8d2cf6652489f82", + "grade": true, + "grade_id": "cell-c180c6230f13243c", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer3.6 is not character\"= setequal(digest(paste(toString(class(answer3.6)), \"1e1fa\")), \"e087a386b27f00004a34aa18a4121dc3\"))\n", + "stopifnot(\"length of answer3.6 is not correct\"= setequal(digest(paste(toString(length(answer3.6)), \"1e1fa\")), \"ae2d406db16c00044acce6f4fbfc8ae2\"))\n", + "stopifnot(\"value of answer3.6 is not correct\"= setequal(digest(paste(toString(tolower(answer3.6)), \"1e1fa\")), \"68ccef2326206925966a99e426d93ac4\"))\n", + "stopifnot(\"letters in string value of answer3.6 are correct but case is not correct\"= setequal(digest(paste(toString(answer3.6), \"1e1fa\")), \"772b8e2399aa39350c32da02b2ae606c\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "55a6c6c328176a349ee3ff64edb84325", + "grade": false, + "grade_id": "cell-34ba4508e97d7316", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.7** Multiple Choice: \n", + "
{points: 1}\n", + "\n", + "Which model does a better job of predicting on the test dataset?\n", + "\n", + "A. $k$-nn regression \n", + "\n", + "B. Simple linear regression \n", + "\n", + "C. Neither, they are identical\n", + "\n", + "Save the letter of the answer you think is correct to a variable named `answer3.7`. Make sure you put quotations around the letter and pay attention to case." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "9a51d640e578f07161efec55c23a645e", + "grade": false, + "grade_id": "cell-e090cdac97461555", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "78370634905ab1845eb2398607033420", + "grade": true, + "grade_id": "cell-9f902420da757d0a", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer3.7 is not character\"= setequal(digest(paste(toString(class(answer3.7)), \"624f2\")), \"3d482a2579ff60cd919540409cdc4bf3\"))\n", + "stopifnot(\"length of answer3.7 is not correct\"= setequal(digest(paste(toString(length(answer3.7)), \"624f2\")), \"a00854f1d6158b8ae184fde0094f66dc\"))\n", + "stopifnot(\"value of answer3.7 is not correct\"= setequal(digest(paste(toString(tolower(answer3.7)), \"624f2\")), \"3bd126b31a6045a72d864108a9877fb2\"))\n", + "stopifnot(\"letters in string value of answer3.7 are correct but case is not correct\"= setequal(digest(paste(toString(answer3.7), \"624f2\")), \"b885703406c472a36caebd8883c84dde\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "a648f83d076164079d8a2aa6ba3ad841", + "grade": false, + "grade_id": "cell-e0b42a79e704b681", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Given that the linear regression model is a straight line, we can write our model as a mathematical equation. We can get the two numbers we need for this from the coefficients, `(Intercept)` and `time_hrs`. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "a9bff9106a8ff0da7ee39b23504b5bcc", + "grade": false, + "grade_id": "cell-950e45b9b52f7a59", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# run this cell\n", + "lm_fit" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "1201b3ad79a96bfb1010e4ebff2b3c8e", + "grade": false, + "grade_id": "cell-936d12f8333ecff6", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.8.1** Multiple Choice: \n", + "
{points: 1}\n", + "\n", + "Which of the following mathematical equations represents the model based on the numbers output in the cell above? \n", + "\n", + "A. $Predicted \\ race \\ time \\ (in \\ hours) = 4.88 - 0.02 * max \\ (in \\ miles)$\n", + "\n", + "B. $Predicted \\ race \\ time \\ (in \\ hours) = -0.02 + 4.88 * max \\ (in \\ miles)$\n", + "\n", + "C. $Predicted \\ max \\ (in \\ miles) = 4.88 - 0.02 * \\ race \\ time \\ (in \\ hours)$\n", + " \n", + "D. $Predicted \\ max \\ (in \\ miles) = -0.02 + 4.88 * \\ race \\ time \\ (in \\ hours)$\n", + "\n", + "Save the letter of the answer you think is correct to a variable named `answer3.8.1`. Make sure you put quotations around the letter and pay attention to case." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "c7048c87837b2ea657cdc7c5138cacc2", + "grade": false, + "grade_id": "cell-e2595c01d8d07897", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "babee6dcecbc245d813562388fdb3453", + "grade": true, + "grade_id": "cell-5884a2fd9625b2e8", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer3.8.1 is not character\"= setequal(digest(paste(toString(class(answer3.8.1)), \"f0d48\")), \"ea9605462745f55518552cc11ef8d4e2\"))\n", + "stopifnot(\"length of answer3.8.1 is not correct\"= setequal(digest(paste(toString(length(answer3.8.1)), \"f0d48\")), \"5f486c8d36f348ddcab47e29d7675259\"))\n", + "stopifnot(\"value of answer3.8.1 is not correct\"= setequal(digest(paste(toString(tolower(answer3.8.1)), \"f0d48\")), \"cd6167a585ec3f62fe3b556fcaa430ac\"))\n", + "stopifnot(\"letters in string value of answer3.8.1 are correct but case is not correct\"= setequal(digest(paste(toString(answer3.8.1), \"f0d48\")), \"80181f4235abd8a5a976dbeb9f24b4f7\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "0a3c609e5aad862abd5071ad3500be3d", + "grade": false, + "grade_id": "cell-79f423b84cd2fa5c", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "source('cleanup.R')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "4.3.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/materials/R/worksheet_regression2/cleanup.R b/materials/R/worksheet_regression2/cleanup.R new file mode 100644 index 0000000..d78b62b --- /dev/null +++ b/materials/R/worksheet_regression2/cleanup.R @@ -0,0 +1 @@ +# clean up data files that students output diff --git a/materials/R/worksheet_regression2/data/marathon.csv b/materials/R/worksheet_regression2/data/marathon.csv new file mode 100644 index 0000000..d509d97 --- /dev/null +++ b/materials/R/worksheet_regression2/data/marathon.csv @@ -0,0 +1,930 @@ +age,bmi,female,footwear,group,injury,mf_d,mf_di,mf_ti,max,sprint,mf_s,time_hrs +35,23.5923233,0,2,1,2,42195,4,10295,60,1,4.098591549295775,2.8597222222222225 +33,22.51829529,0,2,2,2,42195,3,12292,50,0,3.432720468597462,3.4144444444444444 +38,25.56031227,0,2,3,1,42195,4,10980,65,0,3.842896174863388,3.05 +34,22.60793114,0,2,1,1,42195,3,10694,88,1,3.945670469422106,2.9705555555555554 +39,24.97483635,0,2,1,1,42195,2,13452,51,0,3.136708296164139,3.7366666666666664 +33,24.30183029,1,2,2,1,42195,3,14940,40,0,2.82429718875502,4.15 +34,24.57002449,0,1,3,1,42195,3,10747,75,1,3.926211966130083,2.9852777777777777 +53,23.16774559,0,2,1,1,42195,3,10875,45,1,3.88,3.0208333333333335 +34,23.58257103,1,2,2,1,42195,3,16580,39,0,2.5449336550060315,4.605555555555555 +44,20.03506279,1,2,3,1,42195,2,15440,45,1,2.732836787564767,4.288888888888889 +27,29.88360977,0,1,1,1,42195,4,14430,28,1,2.924116424116424,4.008333333333334 +27,21.38561058,0,2,2,1,42195,2,9475,110,1,4.453298153034301,2.631944444444444 +39,29.73737335,0,2,1,2,42195,4,13113,52,0,3.2177991306337224,3.6425 +32,29.21779823,1,2,3,1,42195,2,17190,55,0,2.4546247818499127,4.775 +24,21.70493698,1,2,3,2,42195,2,12701,50,1,3.322179355956224,3.5280555555555555 +35,30.05259323,1,2,1,3,42195,3,22139,30,0,1.9059126428474638,6.1497222222222225 +63,24.48979568,0,2,2,2,42195,2,15172,30,0,2.7811099393619827,4.214444444444444 +47,24.55106735,0,2,3,2,42195,2,14416,52,1,2.9269561598224194,4.004444444444444 +28,18.86635017,1,2,3,1,42195,2,11707,62,0,3.6042538652088494,3.2519444444444447 +42,25.10309982,0,2,2,1,42195,3,14813,30,0,2.848511442651725,4.114722222222222 +58,26.4293766,0,1,3,1,42195,2,14071,70,0,2.9987207732215193,3.9086111111111115 +33,21.75764084,1,2,3,2,42195,3,13337,120,0,3.1637549673839693,3.7047222222222222 +33,22.51829529,0,1,3,1,42195,3,9508,135,1,4.437841817416912,2.641111111111111 +24,22.53944397,1,2,2,1,42195,4,15420,30,0,2.7363813229571985,4.283333333333333 +47,22.49134827,0,2,1,1,42195,4,12312,40,1,3.4271442495126707,3.42 +26,25.25252533,1,2,1,2,42195,2,16899,34,1,2.4968933072962898,4.694166666666666 +52,23.80480194,0,1,1,1,42195,4,13257,40,1,3.182846797918081,3.6824999999999997 +33,28.2003727,1,2,2,1,42195,3,18484,15,0,2.2827851114477387,5.134444444444444 +50,19.17613602,1,2,2,1,42195,2,10718,80,1,3.936835230453443,2.977222222222222 +27,20.77414703,1,2,2,1,42195,2,12095,70,1,3.488631665977677,3.3597222222222225 +47,25.24751663,0,3,1,1,42195,2,10757,60,1,3.9225620526169007,2.9880555555555555 +28,20.91324425,1,1,1,1,42195,2,10888,86,1,3.875367376928729,3.0244444444444443 +37,27.4961338,0,2,3,1,42195,2,14700,42,0,2.870408163265306,4.083333333333333 +31,22.53840446,0,1,2,2,42195,3,12323,45,1,3.424085044226244,3.4230555555555555 +31,24.52615929,0,1,3,1,42195,2,10778,7,0,3.914919280014845,2.993888888888889 +25,24.95965385,1,1,2,1,42195,3,14985,45,1,2.815815815815816,4.1625 +28,19.92750168,1,2,3,3,42195,2,12151,55,0,3.4725536992840094,3.375277777777778 +43,25.24778557,0,2,2,1,42195,3,17400,36,0,2.425,4.833333333333333 +26,19.86567116,1,2,2,1,42195,2,10461,94,1,4.033553197591052,2.9058333333333333 +30,20.57555008,1,2,2,1,42195,4,12719,72,1,3.317477789134366,3.5330555555555554 +36,24.34137344,0,2,3,1,42195,3,13530,51,1,3.1186252771618626,3.7583333333333333 +34,27.51718903,0,2,2,1,42195,3,14939,43,0,2.824486244059174,4.149722222222222 +40,28.44444466,0,2,1,1,42195,4,11611,82,1,3.6340539143915254,3.225277777777778 +38,23.37423897,1,2,2,2,42195,3,13690,35,0,3.0821767713659605,3.8027777777777776 +37,23.84960938,0,2,3,1,42195,3,12566,42,1,3.3578704440553877,3.4905555555555554 +40,21.5213356,0,2,2,1,42195,3,10698,70,1,3.9441951766685364,2.9716666666666667 +33,21.3521862,0,2,1,1,42195,3,13040,40,1,3.235812883435583,3.6222222222222222 +54,21.70465088,0,2,1,1,42195,4,12641,52,0,3.3379479471560796,3.511388888888889 +38,23.80480194,0,1,3,1,42195,3,13860,50,0,3.044372294372294,3.85 +29,18.63140297,1,2,3,1,42195,4,13632,51,1,3.0952904929577465,3.7866666666666666 +31,24.37873268,0,1,3,1,42195,3,15420,34,1,2.7363813229571985,4.283333333333333 +48,28.05836296,0,2,2,1,42195,3,18300,30,0,2.305737704918033,5.083333333333333 +34,20.19801331,0,2,1,1,42195,3,11453,70,1,3.6841875491137692,3.181388888888889 +44,25.20478821,0,2,2,1,42195,4,13458,55,1,3.135309852875613,3.7383333333333337 +64,24.81470108,0,2,1,1,42195,3,14807,60,1,2.849665698656041,4.1130555555555555 +55,25.11189079,0,2,1,1,42195,3,14188,50,1,2.973992106005075,3.9411111111111112 +28,19.66026878,0,2,1,2,42195,4,10080,70,0,4.186011904761905,2.8 +36,22.60793114,0,1,2,1,42195,3,10040,70,1,4.202689243027889,2.788888888888889 +29,27.19741821,1,2,1,2,42195,3,17341,44,0,2.4332506775849145,4.816944444444444 +28,24.34137344,0,2,3,2,42195,3,13840,35,1,3.048771676300578,3.844444444444444 +36,22.857143399999998,0,1,1,2,42195,4,10652,60,1,3.961227938415321,2.9588888888888887 +39,23.37423897,1,2,1,1,42195,3,13148,40,1,3.209233343474293,3.652222222222222 +27,20.20201874,1,2,2,2,42195,2,14100,50,1,2.9925531914893617,3.9166666666666665 +31,22.80591202,1,1,2,2,42195,3,15060,35,0,2.801792828685259,4.183333333333334 +34,23.80480194,1,2,2,1,42195,2,16050,35,1,2.6289719626168226,4.458333333333333 +60,23.87511635,0,2,1,1,42195,3,13767,42,0,3.0649378949662234,3.8241666666666663 +27,24.38188744,0,2,2,2,42195,3,15120,55,0,2.7906746031746033,4.2 +30,21.38588142,0,2,3,1,42195,2,10211,70,1,4.132308294976006,2.836388888888889 +60,24.37873268,0,2,1,1,42195,4,12480,55,1,3.3810096153846154,3.466666666666667 +38,22.37248611,1,2,1,2,42195,3,12420,60,1,3.3973429951690823,3.45 +35,28.25022507,0,1,1,1,42195,4,13147,52,0,3.2094774473263863,3.6519444444444447 +43,22.80591202,0,2,1,1,42195,3,11107,63,1,3.7989556135770237,3.085277777777778 +33,25.11189079,0,1,3,1,42195,4,11056,58,1,3.8164797395079595,3.071111111111111 +49,22.13814735,0,2,1,1,42195,2,14570,60,1,2.89601921757035,4.0472222222222225 +25,22.35768318,0,2,2,2,42195,2,9301,80,0,4.536608966777766,2.5836111111111113 +28,20.77922058,1,2,3,1,42195,2,10691,85,1,3.9467776634552427,2.9697222222222224 +39,21.70493698,1,2,2,1,42195,4,15971,38,0,2.641976081647987,4.436388888888889 +23,22.80591202,0,2,2,1,42195,3,10353,70,0,4.07563025210084,2.8758333333333335 +47,24.48979568,0,2,1,1,42195,3,12111,55,1,3.484022789199901,3.3641666666666667 +32,29.34202003,0,2,1,1,42195,4,12600,38,1,3.348809523809524,3.5 +31,23.30241394,1,2,3,3,42195,3,12123,50,1,3.4805741153179905,3.3675 +43,25.23407936,0,2,2,1,42195,2,13800,52,1,3.0576086956521737,3.8333333333333335 +38,20.41903305,1,2,3,2,42195,2,13120,62,1,3.216082317073171,3.6444444444444444 +36,22.91344261,0,2,2,2,42195,2,12423,55,0,3.396522579087177,3.4508333333333336 +43,19.64085388,0,2,3,1,42195,2,11622,57,0,3.6306143520908623,3.228333333333333 +35,25.16514397,1,3,2,1,42195,4,14453,35,1,2.9194630872483223,4.014722222222222 +51,34.34582138,0,2,2,1,42195,3,18960,45,0,2.225474683544304,5.266666666666667 +28,23.47361565,0,2,1,1,42195,2,11010,72,1,3.832425068119891,3.058333333333333 +39,22.84348106,0,2,1,1,42195,2,11148,40,1,3.784983853606028,3.0966666666666667 +31,20.3689785,1,1,2,1,42195,3,15009,41,0,2.8113132120727564,4.1691666666666665 +54,21.2900238,0,2,1,2,42195,2,10081,100,1,4.185596666997322,2.800277777777778 +26,30.05259323,1,2,1,1,42195,2,15743,32,1,2.6802388363082006,4.373055555555555 +38,21.69139481,0,2,2,2,42195,2,9627,105,1,4.382985353692739,2.6741666666666664 +34,25.04382706,0,2,3,1,42195,4,10780,50,0,3.9141929499072354,2.9944444444444445 +31,20.55720139,1,2,1,2,42195,2,12493,35,0,3.3774913951813015,3.470277777777778 +50,22.60793114,0,1,3,1,42195,4,12052,68,1,3.5010786591437104,3.347777777777778 +45,22.62626266,0,2,3,1,42195,2,10593,65,1,3.983290852449731,2.9425000000000003 +36,22.33406639,0,2,3,1,42195,4,8777,86,1,4.807451293152558,2.4380555555555556 +27,23.20066833,0,1,2,1,42195,3,12960,60,0,3.255787037037037,3.6 +39,23.49176788,1,2,3,2,42195,2,17660,55,0,2.3892978482446208,4.905555555555555 +34,22.26345062,0,2,1,1,42195,3,10425,60,0,4.047482014388489,2.8958333333333335 +47,25.66305733,0,1,3,1,42195,3,11898,38,1,3.5463943519919314,3.305 +44,19.62345314,1,2,3,1,42195,2,9838,22,1,4.28898150030494,2.7327777777777778 +35,22.91344261,0,1,1,1,42195,2,10137,65,1,4.162474104764724,2.815833333333333 +36,22.97629166,0,2,3,1,42195,3,15508,38,0,2.720853752901728,4.307777777777777 +53,23.00556564,0,1,3,1,42195,4,12255,50,1,3.4430844553243576,3.404166666666667 +37,20.34231186,0,1,3,1,42195,4,11352,50,0,3.7169661733615222,3.1533333333333333 +32,24.85795403,1,2,1,1,42195,4,14400,40,0,2.9302083333333333,4 +37,23.46041107,1,2,2,3,42195,4,17160,40,1,2.4589160839160837,4.766666666666667 +38,21.18406296,0,2,2,2,42195,2,12005,52,1,3.5147855060391504,3.3347222222222226 +24,21.06158638,1,2,1,3,42195,2,13530,60,1,3.1186252771618626,3.7583333333333333 +34,24.30183029,0,2,1,2,42195,3,12507,40,0,3.3737107219956823,3.4741666666666666 +35,24.85795403,1,2,1,3,42195,2,15527,40,1,2.7175243124879245,4.313055555555556 +25,23.08238602,1,2,2,1,42195,2,12048,60,0,3.502241035856574,3.3466666666666667 +30,21.69922447,1,2,2,2,42195,3,10334,85,1,4.083123669440681,2.8705555555555553 +37,28.49721718,0,2,1,1,42195,4,16474,50,0,2.5613087289061554,4.5761111111111115 +26,22.19460106,1,2,1,2,42195,3,20355,35,1,2.072955047899779,5.654166666666667 +33,21.23309135,0,3,2,1,42195,2,11440,65,1,3.688374125874126,3.1777777777777776 +37,21.69139481,0,2,3,1,42195,3,9290,108,1,4.541980624327233,2.5805555555555557 +26,19.44146538,1,2,3,1,42195,2,16226,30,0,2.6004560581782323,4.5072222222222225 +37,23.00556564,1,2,1,2,42195,2,16636,60,1,2.536366915123828,4.6211111111111105 +24,24.30183029,1,2,1,1,42195,3,16660,34,1,2.5327130852340938,4.627777777777778 +23,20.41903305,1,2,3,1,42195,3,17153,35,0,2.459919547601003,4.764722222222222 +44,21.68908119,1,2,1,1,42195,3,15060,55,1,2.801792828685259,4.183333333333334 +32,19.92143822,0,2,2,2,42195,3,10080,67,1,4.186011904761905,2.8 +41,21.38588142,0,2,3,3,42195,3,15617,48,0,2.701863354037267,4.338055555555556 +32,26.20307159,1,2,1,2,42195,3,14313,33.59999847,1,2.9480192831691467,3.9758333333333336 +61,23.74768066,0,2,1,1,42195,3,12900,68,1,3.2709302325581397,3.5833333333333335 +42,22.50635719,0,1,2,1,42195,2,11248,89,0,3.751333570412518,3.1244444444444444 +27,23.67722511,1,2,3,2,42195,2,13143,52,1,3.2104542341931066,3.6508333333333334 +22,25.23191071,0,1,1,1,42195,3,12132,50,1,3.4779920870425323,3.3699999999999997 +28,19.44146538,1,2,1,2,42195,4,12960,43,1,3.255787037037037,3.6 +26,29.68460083,0,2,2,1,42195,2,17137,40,1,2.4622162572212174,4.760277777777778 +25,22.14966202,0,2,3,1,42195,4,9152,110,1,4.6104676573426575,2.542222222222222 +47,25.23407936,0,2,2,1,42195,3,11620,62,1,3.6312392426850257,3.2277777777777774 +50,20.24147606,1,2,1,2,42195,4,12211,69,0,3.4554909507820817,3.391944444444445 +34,21.24975014,0,1,2,1,42195,2,11047,75,1,3.8195890286955736,3.068611111111111 +29,23.67722511,0,1,2,2,42195,3,11295,55,1,3.735723771580345,3.1375 +25,24.20903206,1,1,3,1,42195,2,13620,50,1,3.0980176211453743,3.783333333333333 +49,19.57361603,1,2,3,2,42195,2,14304,25,0,2.9498741610738257,3.9733333333333336 +32,22.02581596,0,2,1,1,42195,4,11070,51,1,3.8116531165311653,3.075 +33,25.87862206,1,2,3,3,42195,4,21300,20,0,1.9809859154929577,5.916666666666667 +35,21.38588142,1,2,1,1,42195,4,14340,30,0,2.942468619246862,3.9833333333333334 +38,22.65063095,0,2,2,1,42195,2,12485,67,1,3.379655586704045,3.468055555555556 +32,21.23309135,0,2,3,2,42195,2,12720,35,0,3.3172169811320753,3.533333333333333 +42,23.61009026,0,2,2,2,42195,3,12660,61,0,3.3329383886255926,3.5166666666666666 +44,24.48979568,0,2,1,2,42195,3,12531,50,1,3.3672492219296144,3.4808333333333334 +33,27.15151405,0,1,3,1,42195,4,19825,48,0,2.1283732660781842,5.506944444444445 +47,23.97780609,0,2,2,2,42195,3,11678,80,1,3.613204315807501,3.243888888888889 +46,31.7002182,0,2,3,1,42195,4,23100,18,0,1.8266233766233766,6.416666666666667 +36,27.54168892,1,2,2,3,42195,4,20520,40,0,2.056286549707602,5.7 +25,20.58569527,0,2,2,1,42195,4,10176,75,1,4.146521226415095,2.8266666666666667 +24,23.97780609,0,2,2,1,42195,4,9214,107,1,4.579444323855003,2.5594444444444444 +40,23.04032135,0,2,1,2,42195,4,14160,40,0,2.979872881355932,3.933333333333333 +39,21.34480858,1,2,2,2,42195,2,17640,37,0,2.3920068027210886,4.9 +34,24.5955925,1,2,3,1,42195,4,17100,30,0,2.4675438596491226,4.75 +34,23.97780609,1,2,3,1,42195,3,15958,45,0,2.644128336884321,4.432777777777777 +33,21.74523163,0,1,3,1,42195,4,11673,52,0,3.614751991775893,3.2425 +41,19.29499054,1,2,2,1,42195,4,12780,65,1,3.301643192488263,3.55 +35,21.89049911,1,2,3,1,42195,4,14169,57,1,2.977980097395723,3.9358333333333335 +59,24.23761749,0,2,1,1,42195,4,22565,38,0,1.8699313095501884,6.268055555555555 +46,21.5684433,1,2,2,1,42195,4,13100,79,1,3.2209923664122138,3.638888888888889 +58,21.74523163,0,1,2,1,42195,4,12247,100,1,3.445333551073732,3.4019444444444447 +50,23.14814949,0,2,1,2,42195,3,14433,35,0,2.923508626065267,4.009166666666667 +32,22.53944397,1,2,3,1,42195,4,12774,43,1,3.3031939877876937,3.5483333333333333 +45,19.79558945,1,2,3,1,42195,3,12844,60,0,3.2851915291186544,3.5677777777777777 +30,21.91380501,0,3,3,3,42195,2,18720,40,0,2.25400641025641,5.2 +27,23.08238602,1,2,2,3,42195,4,18514,36.5,0,2.27908609700767,5.142777777777778 +30,20.522686,1,2,1,2,42195,2,13180,35,1,3.20144157814871,3.661111111111111 +40,25.04382706,1,2,1,1,42195,2,18000,55,0,2.3441666666666667,5 +35,22.44668961,0,2,1,2,42195,2,11340,20,1,3.7208994708994707,3.15 +30,20.91937065,0,1,2,1,42195,3,9081,87,1,4.646514701024116,2.5225 +31,20.86985588,1,2,3,2,42195,4,11219,60,1,3.7610303948658528,3.116388888888889 +56,25.04382706,0,2,3,1,42195,3,17110,48,1,2.4661016949152543,4.752777777777778 +26,24.17777824,0,2,1,1,42195,2,14332,42,0,2.9441110801004746,3.9811111111111113 +47,25.96857071,0,2,1,2,42195,4,12713,60,1,3.3190434987807755,3.531388888888889 +45,24.81470108,0,2,2,2,42195,4,12212,40,0,3.4552079921388796,3.3922222222222222 +52,26.71614075,0,2,3,2,42195,2,16055,40,0,2.6281532232949236,4.459722222222222 +45,24.17159081,0,1,1,2,42195,2,15360,52,0,2.7470703125,4.266666666666667 +35,24.56541252,0,1,2,2,42195,3,10640,60,1,3.9656954887218046,2.9555555555555557 +31,22.0385685,1,1,1,3,42195,4,12775,51,1,3.30293542074364,3.548611111111111 +33,29.68460083,0,2,2,2,42195,3,14872,22,0,2.83721086605702,4.131111111111111 +31,25.65335846,1,1,1,2,42195,3,15613,55,0,2.7025555626721323,4.336944444444444 +50,25.04382706,0,2,1,2,42195,3,11961,47,1,3.527715073990469,3.3225 +39,22.51969719,0,2,1,2,42195,4,15420,40,0,2.7363813229571985,4.283333333333333 +39,21.66193008,1,2,3,1,42195,3,14660,50,0,2.8782401091405183,4.072222222222222 +43,24.55106735,0,2,2,1,42195,2,12121,70,1,3.4811484200973517,3.3669444444444445 +37,19.24729347,1,2,2,1,42195,3,14820,100,1,2.847165991902834,4.116666666666666 +38,28.81798363,1,2,3,1,42195,2,16449,60,1,2.565201532008025,4.569166666666666 +35,20.58569527,0,2,3,1,42195,4,12480,56,1,3.3810096153846154,3.466666666666667 +34,20.73756218,1,2,3,1,42195,2,13693,50,0,3.0815014971153145,3.803611111111111 +28,21.74523163,0,2,3,2,42195,3,16513,26,0,2.555259492521044,4.586944444444444 +28,20.86985588,1,2,3,1,42195,4,15627,45,0,2.7001343827990016,4.340833333333333 +48,25.67717934,0,2,3,1,42195,2,16140,20,0,2.6143122676579926,4.483333333333333 +23,20.48237419,0,2,1,1,42195,2,10781,55,0,3.9138298859103977,2.9947222222222223 +51,25.74573708,1,2,1,1,42195,3,16578,40,0,2.5452406804198335,4.605 +33,25.92195129,0,1,3,2,42195,3,12989,38,1,3.2485179767495573,3.608055555555555 +28,22.89282227,1,2,1,1,42195,3,12664,55,1,3.3318856601389766,3.517777777777778 +39,23.00785828,0,2,1,1,42195,2,11065,70,1,3.8133755083596927,3.073611111111111 +38,25.97402573,0,2,1,1,42195,4,13201,55,1,3.1963487614574655,3.666944444444445 +42,24.6258564,0,2,1,1,42195,2,15009,57,0,2.8113132120727564,4.1691666666666665 +41,20.03506279,1,2,3,1,42195,3,13458,57,0,3.135309852875613,3.7383333333333337 +27,25.23191071,0,1,3,1,42195,2,12267,80,0,3.4397163120567376,3.4074999999999998 +42,21.87164688,1,2,3,1,42195,2,12399,65,1,3.4030970239535447,3.444166666666667 +29,24.1301918,0,2,2,1,42195,4,12179,52,0,3.4645701617538385,3.3830555555555555 +44,21.69139481,1,2,2,1,42195,3,13609,40,1,3.1005217135719008,3.7802777777777776 +31,23.00573349,1,2,2,1,42195,2,12817,56,1,3.2921120386986034,3.560277777777778 +22,21.5213356,0,2,1,1,42195,3,11841,50,0,3.563465923486192,3.2891666666666666 +28,23.74768066,1,2,2,3,42195,4,22623,40,0,1.865137249701631,6.284166666666667 +55,26.7323513,0,2,3,2,42195,3,13924,42,0,3.030379201378914,3.8677777777777775 +28,20.24406624,1,2,1,1,42195,3,12851,60,1,3.2834020698778303,3.5697222222222225 +29,21.87164688,1,2,3,2,42195,4,17574,40,0,2.400990099009901,4.881666666666666 +28,21.62211609,1,2,3,2,42195,2,12180,50,1,3.4642857142857144,3.3833333333333333 +27,24.81470108,0,1,3,2,42195,3,14442,31,1,2.9216867469879517,4.011666666666667 +35,25.05203247,0,2,3,2,42195,4,14399,55,1,2.9304118341551497,3.999722222222222 +40,22.176784519999998,0,2,3,1,42195,3,9600,87,1,4.3953125,2.6666666666666665 +33,24.38188744,0,2,3,1,42195,3,12900,45,1,3.2709302325581397,3.5833333333333335 +32,22.51829529,0,2,1,1,42195,4,10139,100,0,4.16165302298057,2.8163888888888886 +39,19.66026878,1,2,2,1,42195,4,14940,58,0,2.82429718875502,4.15 +45,23.84960938,0,2,1,1,42195,3,12510,40,0,3.3729016786570742,3.475 +27,22.3776226,1,2,3,1,42195,3,13496,60,1,3.1264819205690575,3.748888888888889 +41,25.79427338,1,2,2,1,42195,3,14060,55,0,3.0010668563300142,3.905555555555556 +30,21.91380501,0,2,1,1,42195,3,12029,54,0,3.5077728822013468,3.3413888888888885 +28,23.94907188,0,2,2,2,42195,3,12289,34,1,3.433558466921637,3.413611111111111 +46,25.85858536,0,2,3,2,42195,2,16638,37,0,2.5360620266858995,4.621666666666667 +43,26.69023323,0,2,3,3,42195,4,20100,35,0,2.0992537313432837,5.583333333333333 +39,23.27121544,1,2,1,1,42195,2,19920,40,1,2.118222891566265,5.533333333333333 +24,20.86985588,1,2,2,2,42195,4,14400,40,0,2.9302083333333333,4 +31,20.15620995,1,1,1,3,42195,3,11205,42,0,3.7657295850066936,3.1125 +30,23.37472534,0,2,2,3,42195,4,14136,45,1,2.984932088285229,3.9266666666666667 +41,28.44444466,0,2,3,1,42195,4,14100,50,0,2.9925531914893617,3.9166666666666665 +42,23.00573349,1,2,3,1,42195,3,14820,48,0,2.847165991902834,4.116666666666666 +42,21.23312569,0,2,2,1,42195,3,10850,45,0,3.8889400921658988,3.013888888888889 +40,25.11189079,0,2,1,3,42195,4,16080,55,1,2.6240671641791047,4.466666666666667 +47,24.70283508,0,2,1,3,42195,3,12720,70,1,3.3172169811320753,3.533333333333333 +30,22.01950264,1,2,3,2,42195,3,18585,30,1,2.2703793381759483,5.1625 +30,23.74768066,0,1,3,1,42195,4,9278,112,1,4.547855141194223,2.5772222222222223 +33,25.11223602,0,3,1,1,42195,2,12555,55,1,3.3608124253285543,3.4875 +28,26.7323513,0,2,1,1,42195,4,12536,48,0,3.365906190172304,3.482222222222222 +30,24.41077614,0,2,2,2,42195,2,10517,63,1,4.01207568698298,2.921388888888889 +41,22.80591202,0,1,3,1,42195,3,10065,55,1,4.192250372578242,2.7958333333333334 +28,20.77414703,1,3,1,1,42195,4,11190,91,1,3.7707774798927614,3.1083333333333334 +30,19.17613602,1,1,1,3,42195,2,13289,55,1,3.1751824817518246,3.6913888888888886 +44,24.44100571,0,2,1,1,42195,4,9518,108,1,4.433179239335995,2.6438888888888887 +41,19.47665596,0,1,2,2,42195,2,10579,61,0,3.9885622459589753,2.938611111111111 +37,24.48979568,0,2,1,1,42195,3,13027,60,1,3.2390419897136717,3.6186111111111114 +28,22.68170738,0,1,2,1,42195,4,9615,88,1,4.388455538221529,2.6708333333333334 +51,25.21078491,0,2,3,2,42195,3,14247,50,1,2.961676142345757,3.9575 +32,25.24751663,0,2,1,2,42195,3,11593,50,1,3.6396963684982317,3.220277777777778 +41,20.73756218,0,2,1,2,42195,3,9807,95,0,4.302539002753136,2.7241666666666666 +31,22.60793114,0,2,1,1,42195,4,11426,60,1,3.6928934010152283,3.173888888888889 +41,27.45825768,0,2,1,2,42195,2,13400,22,1,3.148880597014925,3.7222222222222223 +40,20.62209892,1,2,2,1,42195,2,12549,105,1,3.3624193162801816,3.4858333333333333 +31,47.18464661,1,2,2,1,42195,3,22680,55,0,1.8604497354497354,6.3 +28,20.20103264,0,2,2,1,42195,4,9483,92,1,4.4495412844036695,2.6341666666666668 +37,24.04452515,0,2,3,1,42195,2,10480,60,0,4.026240458015267,2.911111111111111 +35,23.66053009,0,2,3,2,42195,2,10467,65,0,4.031241043278876,2.9074999999999998 +32,19.34570503,0,1,1,1,42195,4,8152,110,1,5.176030421982335,2.2644444444444445 +30,23.74768066,0,2,3,2,42195,3,10136,90,1,4.162884767166535,2.8155555555555556 +49,19.79558945,1,2,2,1,42195,2,12858,47,1,3.281614559029398,3.5716666666666668 +44,22.32523155,1,1,2,1,42195,2,16973,43,1,2.4860071878866434,4.714722222222222 +54,23.00556564,0,2,3,1,42195,3,11580,70,0,3.643782383419689,3.216666666666667 +30,20.47117615,0,2,2,2,42195,4,8815,90,1,4.7867271695972775,2.448611111111111 +52,22.14966202,0,2,2,1,42195,2,11541,56,1,3.6560956589550297,3.205833333333333 +28,22.26345062,0,1,2,2,42195,2,12150,65,1,3.4728395061728397,3.375 +31,24.52615929,1,2,1,1,42195,4,13879,55,0,3.0402046256934936,3.855277777777778 +45,21.0636425,0,2,2,1,42195,2,11807,90,1,3.5737274498179046,3.279722222222222 +46,20.65626717,1,2,2,1,42195,2,12600,60,1,3.348809523809524,3.5 +29,23.08238602,1,2,2,1,42195,3,13162,60,1,3.2058197842273213,3.656111111111111 +39,23.80480194,0,2,1,1,42195,2,11606,50,0,3.6356195071514734,3.223888888888889 +44,24.56541252,0,2,1,1,42195,2,9300,65,1,4.537096774193548,2.5833333333333335 +46,34.39236832,1,2,3,1,42195,4,12423,62,1,3.396522579087177,3.4508333333333336 +33,25.52073479,0,2,1,3,42195,2,20714,15,0,2.037028096939268,5.7538888888888895 +30,23.37472534,0,2,1,1,42195,4,11332,59,0,3.7235262972114365,3.147777777777778 +37,21.83592224,0,2,1,1,42195,4,14640,45,1,2.882172131147541,4.066666666666666 +31,32.5569725,1,2,1,1,42195,2,18600,32,0,2.268548387096774,5.166666666666667 +32,26.11717224,0,1,3,2,42195,4,8950,36,0,4.714525139664804,2.486111111111111 +34,25.74573708,1,2,3,2,42195,2,18005,45,0,2.3435156900860874,5.001388888888888 +38,27.12304497,0,2,3,1,42195,4,17878,42,0,2.3601633292314577,4.96611111111111 +20,21.23309135,0,1,3,3,42195,2,12910,35,0,3.2683965917893105,3.586111111111111 +23,19.17613602,1,2,2,3,42195,3,15276,45,0,2.762175962293794,4.243333333333333 +25,21.04377174,0,2,2,1,42195,2,11703,51,1,3.605485772878749,3.2508333333333335 +47,24.09060478,0,1,2,1,42195,3,13170,70,0,3.203872437357631,3.658333333333333 +30,27.54821014,0,2,1,1,42195,2,11505,72,1,3.667535853976532,3.1958333333333333 +28,21.17267418,1,1,2,2,42195,2,18600,30,0,2.268548387096774,5.166666666666667 +43,30.55125809,0,2,1,1,42195,2,15120,40,1,2.7906746031746033,4.2 +30,20.41903305,1,2,2,1,42195,2,14760,40,1,2.858739837398374,4.1 +33,24.13549423,0,2,2,1,42195,2,12720,35,1,3.3172169811320753,3.533333333333333 +37,23.84960938,0,2,3,1,42195,3,10980,60,1,3.842896174863388,3.05 +27,21.83592224,0,2,2,1,42195,2,13920,60,1,3.03125,3.8666666666666667 +45,27.22624207,0,2,3,2,42195,2,15600,30,0,2.7048076923076922,4.333333333333333 +32,18.19851494,1,1,3,2,42195,4,12530,60,1,3.367517956903432,3.4805555555555556 +24,21.06158638,1,2,3,1,42195,2,10340,75,1,4.080754352030948,2.8722222222222222 +37,25.96857071,0,2,2,1,42195,4,13860,55,1,3.044372294372294,3.85 +47,21.96660423,0,1,1,1,42195,3,13162,55,0,3.2058197842273213,3.656111111111111 +58,19.20026779,1,2,3,1,42195,3,17421,41,0,2.4220768038574136,4.839166666666667 +39,23.67722511,0,2,1,1,42195,3,12813,40,1,3.293139779911028,3.559166666666667 +50,24.13549423,0,2,3,3,42195,4,17040,46,0,2.476232394366197,4.733333333333333 +44,22.20382309,1,2,2,1,42195,2,16209,50,1,2.603183416620396,4.5024999999999995 +30,22.01950264,0,1,1,1,42195,4,14115,40,0,2.989373007438895,3.9208333333333334 +42,24.34137344,0,2,3,2,42195,4,13200,40,1,3.1965909090909093,3.6666666666666665 +46,21.22448921,0,1,3,2,42195,2,10176,55,1,4.146521226415095,2.8266666666666667 +26,23.27775955,0,2,3,1,42195,2,9904,76,1,4.2603998384491115,2.751111111111111 +34,21.38588142,0,2,3,2,42195,3,13480,50,0,3.130192878338279,3.7444444444444445 +31,21.24975014,0,1,1,3,42195,2,11175,30,0,3.7758389261744965,3.1041666666666665 +51,25.96857071,0,2,2,2,42195,2,13617,53,0,3.098700154218991,3.7824999999999998 +32,29.38775444,0,2,1,1,42195,4,15077,45,0,2.798633680440406,4.188055555555556 +37,36.75645447,0,2,2,1,42195,3,21046,22,0,2.0048940416231114,5.846111111111111 +31,23.08344269,1,1,3,1,42195,3,15070,40,0,2.7999336429993362,4.186111111111111 +54,21.38588142,0,1,1,1,42195,4,18900,32,0,2.2325396825396826,5.25 +50,26.7323513,0,2,3,2,42195,4,13444,65,1,3.1385748289199644,3.7344444444444442 +33,21.06158638,1,2,2,1,42195,3,12481,50,1,3.3807387228587453,3.4669444444444446 +45,25.81369209,0,2,3,1,42195,4,12749,63,1,3.309671346772296,3.5413888888888887 +45,23.45083618,0,1,3,1,42195,4,10963,65,1,3.8488552403539176,3.0452777777777778 +34,26.42340279,0,2,3,2,42195,3,11790,68,1,3.578880407124682,3.275 +56,22.41187286,0,2,2,1,42195,3,10906,80,1,3.8689712085090777,3.0294444444444446 +37,25.92195129,1,2,2,3,42195,3,17281,40,0,2.441698975753718,4.800277777777778 +40,28.13294601,0,2,3,1,42195,3,14355,70,1,2.9393939393939394,3.9875 +64,21.2037735,1,2,2,1,42195,2,15000,17,0,2.813,4.166666666666667 +49,26.61252975,0,2,1,1,42195,2,15587,51,1,2.707063578623212,4.329722222222222 +24,21.51694489,1,2,2,1,42195,3,12830,50,0,3.2887763055339048,3.563888888888889 +55,26.6554451,0,1,3,3,42195,2,16410,41,0,2.5712979890310788,4.558333333333334 +35,31.56565857,0,2,1,3,42195,2,16500,65,1,2.557272727272727,4.583333333333333 +37,25.68181801,0,2,2,3,42195,4,20100,35,0,2.0992537313432837,5.583333333333333 +32,23.67722511,0,1,2,1,42195,4,13162,46,1,3.2058197842273213,3.656111111111111 +30,22.68170738,1,2,1,1,42195,3,12753,51,1,3.308633262761703,3.5425 +40,24.93506622,0,2,2,1,42195,3,13405,65,0,3.147706079820962,3.723611111111111 +33,26.37486076,0,2,3,1,42195,2,14822,45,0,2.8467818108217515,4.117222222222222 +26,25.85858536,0,1,2,2,42195,2,16506,45,1,2.5563431479462015,4.585 +49,22.68170738,1,2,1,2,42195,3,18420,33,0,2.29071661237785,5.116666666666666 +34,21.17267418,0,1,2,2,42195,3,10530,72,1,4.007122507122507,2.925 +47,21.39037323,1,2,3,1,42195,2,15000,40,1,2.813,4.166666666666667 +27,27.52437782,0,1,1,2,42195,4,12426,45,0,3.395702559150169,3.4516666666666667 +35,25.24751663,1,2,1,3,42195,3,13079,52,1,3.2261640798226163,3.6330555555555555 +46,23.84960938,0,2,2,1,42195,2,11913,50,0,3.5419289851422815,3.309166666666667 +55,24.0420742,0,2,2,1,42195,3,12741,70,1,3.3117494702142687,3.5391666666666666 +38,24.85058212,1,2,2,2,42195,4,13210,65,0,3.1941710825132477,3.6694444444444443 +60,32.40740967,0,2,1,1,42195,3,18730,50,0,2.252802989855846,5.202777777777778 +36,23.45410728,1,1,1,2,42195,3,17077,30,0,2.470867248345728,4.743611111111111 +53,21.70465088,1,2,2,1,42195,3,16500,50,1,2.557272727272727,4.583333333333333 +32,24.64646339,1,2,3,1,42195,4,17149,40,0,2.460493323225844,4.7636111111111115 +32,23.82998085,1,2,3,3,42195,3,14379,28,1,2.934487794700605,3.9941666666666666 +51,26.56771851,0,2,3,1,42195,3,14745,43,0,2.861648016276704,4.095833333333333 +41,24.54295158,0,1,1,1,42195,3,9960,100,1,4.23644578313253,2.7666666666666666 +49,20.89957428,1,2,3,2,42195,3,12363,67,1,3.4130065518078134,3.434166666666667 +55,24.37873268,0,2,1,2,42195,3,12180,33,0,3.4642857142857144,3.3833333333333333 +34,24.23761749,0,2,2,2,42195,4,15610,44,0,2.7030749519538757,4.336111111111111 +47,21.38588142,0,2,2,2,42195,3,9827,60,1,4.293782436145314,2.729722222222222 +46,24.69135857,0,2,1,3,42195,3,17820,40,0,2.3678451178451176,4.95 +26,23.37423897,0,2,2,1,42195,4,14820,30,0,2.847165991902834,4.116666666666666 +43,23.06619835,0,1,3,1,42195,3,13105,73,0,3.2197634490652423,3.6402777777777775 +26,21.96660423,0,2,3,1,42195,3,11160,40,1,3.7809139784946235,3.1 +28,23.45410728,1,2,2,1,42195,4,13396,50,0,3.1498208420424008,3.7211111111111115 +48,20.25152397,1,2,1,1,42195,3,15840,45,1,2.663825757575758,4.4 +53,22.68170738,0,2,2,1,42195,3,11507,64,0,3.666898409663683,3.196388888888889 +45,23.2189579,0,2,2,1,42195,4,11470,50,0,3.678727114210985,3.186111111111111 +40,23.61009026,0,2,2,1,42195,4,10627,85,1,3.9705467206172957,2.9519444444444445 +59,23.24191475,0,2,2,1,42195,3,14785,56,1,2.8539059857964153,4.106944444444444 +24,21.98859215,1,1,3,1,42195,4,15284,20,0,2.760730175346768,4.245555555555556 +46,25.05050468,1,2,2,1,42195,4,13720,55,1,3.0754373177842567,3.811111111111111 +55,21.30681801,1,1,1,2,42195,2,13506,54,1,3.124167036872501,3.7516666666666665 +33,22.68170738,1,2,3,3,42195,2,19307,32,1,2.1854767700833895,5.363055555555556 +31,20.03506279,0,2,1,1,42195,3,9414,110,1,4.482154238368388,2.615 +40,27.15151405,0,2,2,1,42195,2,17623,21,1,2.3943142484253532,4.895277777777777 +36,23.67722511,0,1,3,1,42195,3,14712,20.60000038,1,2.8680668841761827,4.086666666666667 +38,20.63082695,0,2,2,1,42195,3,13142,52,1,3.2106985238167707,3.6505555555555556 +41,23.79261398,0,2,2,1,42195,4,9801,80,1,4.305172941536578,2.7224999999999997 +47,23.49176788,0,2,3,1,42195,4,10588,55,0,3.9851718927087267,2.9411111111111112 +49,26.27408218,0,2,2,1,42195,4,12516,44,0,3.3712847555129435,3.4766666666666666 +27,20.65626717,1,2,3,1,42195,3,12545,40,0,3.363491430848944,3.4847222222222225 +45,25.96857071,0,2,3,1,42195,3,13973,30,0,3.0197523795892076,3.8813888888888886 +36,20.03506279,1,2,3,2,42195,2,15722,60,1,2.683818852563287,4.367222222222223 +39,24.44100571,0,1,1,2,42195,3,13565,33,1,3.110578695171397,3.7680555555555557 +27,22.51829529,0,2,1,2,42195,4,10846,61,1,3.890374331550802,3.012777777777778 +47,25.92195129,0,2,3,1,42195,2,14045,65,0,3.0042719829120683,3.901388888888889 +33,22.86756706,0,2,3,1,42195,2,15294,43,1,2.7589250686543743,4.248333333333333 +31,26.93602562,1,1,3,1,42195,3,14109,45,0,2.990644269615139,3.919166666666667 +46,21.04377174,0,2,2,1,42195,2,11492,60,1,3.6716846501914375,3.192222222222222 +42,29.09090996,0,2,1,1,42195,2,12613,25,0,3.3453579640053914,3.5036111111111112 +49,24.48979568,0,2,2,1,42195,3,13080,100,0,3.2259174311926606,3.6333333333333333 +31,22.26345062,0,2,3,2,42195,3,11090,70,1,3.8047790802524797,3.0805555555555557 +40,25.24751663,0,2,3,3,42195,2,15600,60,1,2.7048076923076922,4.333333333333333 +48,25.81369209,0,2,2,2,42195,3,13835,56,1,3.049873509215757,3.843055555555556 +30,21.38588142,1,2,2,1,42195,4,17970,25,0,2.3480801335559267,4.991666666666666 +67,28.05836296,0,2,3,1,42195,2,21600,40,0,1.9534722222222223,6 +33,21.640728,1,1,1,1,42195,2,14043,35,1,3.0046998504593034,3.9008333333333334 +35,20.31658554,0,2,3,3,42195,3,10424,63,1,4.047870299309286,2.895555555555555 +42,24.04452515,0,2,1,1,42195,3,13680,100,0,3.0844298245614037,3.8 +51,32.36148453,1,2,1,2,42195,3,22333,42,0,1.8893565575605606,6.203611111111111 +37,23.88304138,0,1,1,1,42195,3,11987,40,1,3.5200634020188537,3.3297222222222222 +33,18.32382584,1,2,3,1,42195,4,11966,50,0,3.5262410162126026,3.323888888888889 +25,21.5213356,0,1,3,1,42195,2,12300,70,1,3.430487804878049,3.4166666666666665 +54,21.87164688,1,2,2,1,42195,4,14306,51,1,2.9494617642947016,3.973888888888889 +34,25.74573708,0,2,1,1,42195,4,13185,45,0,3.200227531285552,3.6625 +39,22.36208534,0,1,1,1,42195,3,10922,54,0,3.863303424281267,3.033888888888889 +32,22.72727203,1,2,3,1,42195,4,14700,55,1,2.870408163265306,4.083333333333333 +45,21.97979736,0,2,3,2,42195,4,11017,70,0,3.829990015430698,3.060277777777778 +34,23.5923233,1,2,2,2,42195,3,16151,30,0,2.612531731781314,4.4863888888888885 +37,24.1301918,0,2,1,1,42195,3,14096,30,0,2.9934023836549377,3.9155555555555557 +49,19.53418541,1,1,3,1,42195,2,13767,76,1,3.0649378949662234,3.8241666666666663 +70,24.30183029,0,2,2,1,42195,2,17706,42,1,2.383090477804134,4.918333333333334 +38,22.53944397,1,2,2,1,42195,3,15480,30,0,2.72577519379845,4.3 +32,23.14814949,0,2,1,1,42195,3,12540,45,0,3.3648325358851676,3.4833333333333334 +36,25.25252724,0,1,3,1,42195,2,15060,28,0,2.801792828685259,4.183333333333334 +38,22.91344261,0,2,1,1,42195,4,14040,37,1,3.0053418803418803,3.9 +33,22.72154427,1,2,3,1,42195,3,16877,35,0,2.50014813059193,4.688055555555556 +35,18.84972191,1,2,2,1,42195,3,11002,75,1,3.8352117796764227,3.0561111111111114 +45,27.5420742,0,2,2,1,42195,2,15720,50,1,2.6841603053435112,4.366666666666666 +39,22.36208534,0,2,2,2,42195,4,11708,20,1,3.6039460198155107,3.252222222222222 +33,23.49176788,1,2,3,2,42195,2,11580,50,1,3.643782383419689,3.216666666666667 +35,22.70362473,1,1,3,1,42195,3,15028,50,1,2.8077588501463935,4.174444444444444 +41,24.27048302,0,2,3,1,42195,4,11604,80,1,3.636246122026887,3.2233333333333336 +47,23.20066833,0,2,2,2,42195,3,12840,3,1,3.286214953271028,3.566666666666667 +43,20.86985588,1,1,2,2,42195,2,17927,38.5,1,2.353712277570146,4.979722222222223 +43,22.80591202,0,2,2,1,42195,4,11510,55,1,3.6659426585577757,3.1972222222222224 +44,31.16883087,0,2,3,1,42195,3,14942,45,1,2.8239191540623745,4.150555555555555 +35,22.01950264,1,2,1,2,42195,4,13090,38,1,3.2234530175706646,3.636111111111111 +38,19.24001694,1,2,1,1,42195,4,14107,55,1,2.99106826398242,3.9186111111111113 +31,23.00556564,0,2,2,2,42195,2,14314,55,1,2.9478133296073774,3.976111111111111 +23,21.51694489,1,2,3,1,42195,4,10991,73,1,3.8390501319261214,3.0530555555555554 +35,19.64085388,0,2,2,1,42195,3,13111,52,1,3.2182899855083518,3.641944444444445 +34,20.89263725,1,1,2,1,42195,4,11880,45,0,3.551767676767677,3.3 +32,21.87164688,1,2,2,2,42195,2,12700,70,1,3.32244094488189,3.5277777777777777 +46,25.21212006,0,2,2,2,42195,2,13320,42,1,3.1677927927927927,3.7 +27,21.06158638,0,2,3,3,42195,4,10905,55,0,3.869325997248968,3.029166666666667 +24,21.74523163,0,2,2,3,42195,2,11520,45,1,3.6627604166666665,3.2 +43,20.63079834,0,1,1,1,42195,3,11363,48,1,3.713367948605122,3.156388888888889 +34,23.67722511,1,2,2,1,42195,3,19516,40,1,2.1620721459315435,5.421111111111111 +30,26.71614075,0,2,3,1,42195,4,11620,30,0,3.6312392426850257,3.2277777777777774 +38,23.5923233,0,2,3,2,42195,3,12120,55,0,3.4814356435643563,3.3666666666666667 +51,23.84960938,0,1,3,1,42195,4,12648,58,1,3.336100569259962,3.5133333333333336 +46,23.74768066,0,1,2,2,42195,4,12930,80,1,3.2633410672853826,3.591666666666667 +32,22.80591202,1,2,1,2,42195,3,15473,62,1,2.7270083371033413,4.298055555555555 +54,29.26304436,1,2,2,1,42195,2,15840,54,0,2.663825757575758,4.4 +24,19.47665596,0,2,2,1,42195,3,12432,45,1,3.3940637065637067,3.453333333333333 +69,23.06619835,0,2,1,2,42195,2,14460,45,1,2.91804979253112,4.016666666666667 +40,22.35768318,1,2,1,2,42195,3,15922,55,1,2.650106770506218,4.422777777777778 +52,25.32242584,0,2,2,1,42195,2,14275,43,0,2.955866900175131,3.9652777777777777 +36,20.91937065,0,2,3,1,42195,3,9398,90,1,4.489785060651203,2.6105555555555555 +48,25.24778557,0,2,2,1,42195,4,12059,55,1,3.499046355419189,3.349722222222222 +45,24.55106735,0,2,2,1,42195,2,13392,42,0,3.1507616487455197,3.7199999999999998 +28,20.86985588,1,1,1,1,42195,3,14768,40,0,2.857191224268689,4.102222222222222 +49,22.3776226,1,1,1,1,42195,2,12275,74,1,3.4374745417515276,3.4097222222222223 +40,20.03710556,0,1,1,1,42195,3,10600,80,0,3.9806603773584905,2.944444444444444 +56,23.52446938,0,1,3,1,42195,3,12663,65,1,3.332148779909974,3.5175 +25,21.66193008,1,2,3,1,42195,2,12247,50,0,3.445333551073732,3.4019444444444447 +35,20.65626717,1,2,2,3,42195,3,14465,40,0,2.9170411337711717,4.018055555555556 +39,22.35768318,0,2,3,2,42195,2,11334,55,1,3.722869242985707,3.1483333333333334 +38,23.14814949,0,2,1,2,42195,3,11780,50,1,3.581918505942275,3.2722222222222226 +29,20.13986015,1,2,1,2,42195,3,13413,45,1,3.145828673674793,3.7258333333333336 +42,25.97402573,0,2,3,3,42195,3,16500,35,0,2.557272727272727,4.583333333333333 +44,22.33406639,0,2,2,2,42195,2,10685,70,1,3.9489939167056622,2.968055555555556 +37,22.56029701,0,1,2,1,42195,3,13502,45,1,3.1250925788772035,3.7505555555555556 +29,24.48979568,0,2,2,1,42195,3,11011,55,1,3.8320770138951956,3.0586111111111114 +44,24.20903206,0,1,2,1,42195,4,10913,43,1,3.8664895079263264,3.031388888888889 +33,21.51694489,1,1,3,2,42195,4,12510,55,1,3.3729016786570742,3.475 +27,19.44146538,0,2,1,2,42195,3,9540,90,1,4.422955974842767,2.65 +35,20.73756218,1,2,3,1,42195,2,12496,55,1,3.376680537772087,3.4711111111111115 +43,25.81369209,0,2,1,1,42195,3,13989,40,1,3.016298520265923,3.8858333333333333 +40,17.84652138,0,2,3,1,42195,4,12518,60,1,3.370746125579166,3.477222222222222 +38,25.24751663,0,2,3,1,42195,3,12510,55,1,3.3729016786570742,3.475 +28,22.68170738,1,2,3,1,42195,2,12510,65,1,3.3729016786570742,3.475 +37,22.80591202,0,2,3,1,42195,3,10920,50,1,3.864010989010989,3.033333333333333 +36,21.51694489,1,2,2,1,42195,3,11400,35,0,3.7013157894736843,3.1666666666666665 +37,27.20981216,0,1,3,2,42195,2,12424,50,0,3.396249195106246,3.451111111111111 +44,22.57785988,0,2,3,1,42195,2,12823,50,1,3.2905716291039537,3.5619444444444444 +43,22.60793114,0,2,3,1,42195,2,16160,20,0,2.611076732673267,4.488888888888889 +35,19.27918243,1,1,3,1,42195,3,11940,65,1,3.5339195979899496,3.316666666666667 +22,21.87164688,1,2,1,1,42195,4,12065,62,1,3.4973062577704104,3.351388888888889 +53,23.37171364,0,2,1,1,42195,2,12430,75,1,3.3946098149637973,3.4527777777777775 +52,21.03681564,1,2,2,2,42195,4,12578,57,1,3.354666878677055,3.493888888888889 +36,22.14966202,1,1,3,3,42195,4,14040,45,0,3.0053418803418803,3.9 +44,23.49176788,0,2,3,2,42195,2,11524,21,0,3.661489066296425,3.201111111111111 +40,22.86775398,1,2,2,1,42195,2,16440,36,1,2.5666058394160585,4.566666666666666 +30,21.37924576,1,1,3,1,42195,2,12180,63,1,3.4642857142857144,3.3833333333333333 +45,21.38588142,0,1,2,1,42195,3,10875,82,1,3.88,3.0208333333333335 +34,22.51829529,0,2,2,1,42195,2,9533,85,1,4.426203713416553,2.6480555555555556 +29,21.38588142,0,1,1,3,42195,2,9877,90,1,4.272046167864736,2.7436111111111114 +39,21.37291145,0,2,1,2,42195,3,9601,130,1,4.3948547026351426,2.666944444444445 +46,20.41903305,1,2,3,1,42195,4,12837,84,1,3.286982939939238,3.565833333333333 +34,21.87164688,1,1,1,2,42195,3,12255,46,0,3.4430844553243576,3.404166666666667 +34,25.66305733,0,2,1,2,42195,3,14820,35,0,2.847165991902834,4.116666666666666 +38,27.41159058,0,2,2,3,42195,4,18026,18,0,2.34078553200932,5.0072222222222225 +32,24.5955925,1,1,3,1,42195,2,12835,58,1,3.287495130502532,3.5652777777777778 +32,25.8121376,0,2,2,2,42195,2,14459,43,0,2.9182516079950203,4.016388888888889 +31,25.75936508,0,2,2,1,42195,4,13025,50,1,3.239539347408829,3.618055555555556 +40,22.26345062,0,1,3,1,42195,3,8657,125,1,4.874090331523623,2.404722222222222 +40,25.20478821,0,1,3,1,42195,3,13585,40,1,3.1059992638940006,3.773611111111111 +32,23.49176788,0,2,3,1,42195,3,13087,50,1,3.2241919462061586,3.635277777777778 +44,22.79490089,0,2,2,1,42195,3,13456,70,1,3.1357758620689653,3.737777777777778 +44,23.5923233,0,2,1,1,42195,4,11985,45,0,3.520650813516896,3.3291666666666666 +28,24.97483635,0,2,2,1,42195,3,11200,67,0,3.7674107142857145,3.111111111111111 +31,23.84960938,0,2,2,3,42195,4,14160,50,0,2.979872881355932,3.933333333333333 +37,21.07580948,1,2,3,2,42195,3,13812,48,1,3.054952215464813,3.8366666666666664 +26,24.73716545,1,1,1,2,42195,2,14668,43,0,2.8766703026997544,4.0744444444444445 +56,25.47970772,0,1,2,2,42195,3,14650,40,0,2.8802047781569966,4.069444444444445 +28,21.70963478,1,2,1,1,42195,2,12153,40,1,3.4719822266107134,3.3758333333333335 +26,23.2189579,0,2,2,1,42195,2,10087,105,1,4.183106969366511,2.8019444444444446 +29,22.0333519,1,2,1,1,42195,2,12352,46,1,3.4160459844559585,3.4311111111111114 +29,22.03732109,0,2,2,1,42195,3,9392,40,1,4.4926533219761495,2.608888888888889 +36,23.00573349,0,2,2,1,42195,2,10473,75,0,4.028931538241191,2.9091666666666667 +32,24.96845627,0,2,3,2,42195,2,13131,50,1,3.213388165410098,3.6475 +34,23.16774559,1,2,1,1,42195,4,14268,44,0,2.957317073170732,3.9633333333333334 +25,21.91380501,0,2,1,1,42195,3,10021,55,1,4.2106576190000995,2.7836111111111115 +48,23.74768066,0,2,3,3,42195,2,15840,26,0,2.663825757575758,4.4 +35,24.04452515,0,2,3,1,42195,3,10861,74,0,3.8850013810882977,3.016944444444445 +49,20.522686,1,2,2,1,42195,4,13020,65,0,3.2407834101382487,3.6166666666666667 +49,23.27272797,0,3,1,1,42195,3,15873,20,1,2.6582876582876582,4.409166666666667 +29,24.22145271,0,2,1,3,42195,4,12225,60,1,3.4515337423312884,3.3958333333333335 +38,22.26345062,0,1,3,2,42195,3,11220,70,1,3.7606951871657754,3.1166666666666667 +43,24.57002449,0,2,3,1,42195,3,11762,90,1,3.5874001020234654,3.2672222222222222 +42,22.16610718,0,2,3,2,42195,2,10522,65,1,4.010169169359437,2.9227777777777777 +28,28.40193176,1,2,2,1,42195,3,16800,40,0,2.5116071428571427,4.666666666666667 +26,20.03710556,0,1,1,1,42195,3,10822,65,0,3.8990020328959525,3.006111111111111 +27,23.84337807,1,1,2,2,42195,2,11145,83,1,3.7860026917900402,3.095833333333333 +25,20.47117615,0,2,3,2,42195,3,10560,105,1,3.9957386363636362,2.933333333333333 +31,21.03681564,1,2,1,1,42195,2,12500,51,0,3.3756,3.4722222222222223 +32,22.91344261,0,2,1,1,42195,2,10349,64,1,4.077205527104068,2.874722222222222 +35,26.69023323,0,2,3,1,42195,3,15000,50,1,2.813,4.166666666666667 +31,21.07240105,1,2,3,1,42195,4,12500,60,1,3.3756,3.4722222222222223 +38,28.11374855,0,2,1,1,42195,3,16280,55,0,2.591830466830467,4.522222222222222 +46,27.03786278,0,2,2,1,42195,3,14378,40,1,2.934691890388093,3.993888888888889 +28,22.3776226,1,2,3,1,42195,4,11835,55,1,3.565272496831432,3.2875 +66,20.76318932,0,2,1,1,42195,3,18930,60,1,2.229001584786054,5.258333333333334 +44,23.42873192,0,2,2,1,42195,4,15310,49,0,2.756041802743305,4.252777777777777 +24,20.19801331,0,2,1,1,42195,4,9130,86,1,4.62157721796276,2.536111111111111 +30,23.97016907,1,2,2,3,42195,3,17827,25,0,2.36691535311606,4.951944444444445 +28,21.640728,0,2,2,2,42195,4,8242,130,1,5.11950982771172,2.2894444444444444 +30,23.5923233,1,2,1,1,42195,4,15496,70,1,2.7229607640681466,4.304444444444444 +33,23.93687057,0,2,1,2,42195,4,8797,104,1,4.79652154143458,2.443611111111111 +31,23.24191475,0,2,3,1,42195,2,14460,50,1,2.91804979253112,4.016666666666667 +28,24.85058212,0,1,2,1,42195,3,14161,50,1,2.979662453216581,3.9336111111111114 +36,23.5923233,0,1,1,2,42195,4,10050,70,1,4.1985074626865675,2.7916666666666665 +55,22.56029701,0,2,1,1,42195,2,11940,60,1,3.5339195979899496,3.316666666666667 +38,23.90596962,0,2,1,3,42195,2,13468,55,0,3.132981882981883,3.741111111111111 +49,23.84960938,0,2,3,1,42195,3,11998,44,1,3.5168361393565593,3.332777777777778 +29,25.74573708,1,2,2,1,42195,3,20430,33,0,2.065345080763583,5.675 +35,22.72189331,1,2,3,2,42195,2,13072,70,1,3.227891676866585,3.631111111111111 +40,24.30183029,1,2,3,2,42195,4,14750,55,1,2.860677966101695,4.097222222222222 +23,17.7299614,0,2,2,2,42195,3,9780,50,0,4.314417177914111,2.716666666666667 +36,22.0385685,0,2,1,1,42195,2,11395,97,1,3.702939885914875,3.1652777777777774 +36,23.03030205,1,1,1,1,42195,2,12291,54,1,3.432999755918965,3.4141666666666666 +49,23.3258419,0,1,3,1,42195,4,10701,62,0,3.9430894308943087,2.9724999999999997 +43,24.3044014,0,2,2,1,42195,3,11593,65,1,3.6396963684982317,3.220277777777778 +36,32.05936813,0,1,1,3,42195,3,15600,45,1,2.7048076923076922,4.333333333333333 +34,23.2491684,0,1,1,2,42195,3,13320,50,0,3.1677927927927927,3.7 +30,22.57785988,0,1,2,1,42195,3,11040,100,1,3.8220108695652173,3.066666666666667 +35,22.01950264,1,1,1,1,42195,3,12546,55,1,3.363223338115734,3.485 +28,23.08344269,0,1,3,1,42195,3,13320,46,0,3.1677927927927927,3.7 +30,20.03506279,1,2,1,3,42195,2,12996,45,0,3.246768236380425,3.61 +39,21.38588142,0,1,1,2,42195,4,11462,70,1,3.6812947129645788,3.1838888888888888 +31,22.68170738,1,2,1,1,42195,2,12204,60,0,3.457472959685349,3.39 +46,19.64085388,0,1,1,1,42195,3,9838,70,1,4.28898150030494,2.7327777777777778 +48,20.20202065,0,1,3,1,42195,3,10020,70,1,4.211077844311378,2.783333333333333 +48,20.76124382,1,2,1,1,42195,2,12072,55,1,3.4952783300198806,3.353333333333333 +32,25.89813232,0,2,2,1,42195,3,12075,57,1,3.4944099378881988,3.3541666666666665 +45,26.6554451,0,2,2,3,42195,2,15600,29,0,2.7048076923076922,4.333333333333333 +33,20.92764473,0,2,3,3,42195,3,10719,80,0,3.936467954100196,2.9775 +35,19.67647552,1,2,2,3,42195,2,11160,20,1,3.7809139784946235,3.1 +34,19.44146538,0,2,1,1,42195,3,10485,44,1,4.024320457796852,2.9125 +54,22.33406639,1,1,3,1,42195,3,12649,65,0,3.335836825045458,3.513611111111111 +32,26.16460609,1,1,3,1,42195,3,19200,45,1,2.19765625,5.333333333333333 +42,28.2599144,0,2,3,1,42195,2,21180,40,0,1.9922096317280453,5.883333333333334 +29,31.14186668,0,2,3,1,42195,2,13560,50,0,3.1117256637168142,3.7666666666666666 +37,24.22145271,0,2,1,2,42195,4,10558,65,0,3.996495548399318,2.932777777777778 +44,18.87354088,1,1,1,1,42195,2,10077,104,1,4.187258112533492,2.7991666666666664 +27,21.30681801,1,2,2,2,42195,3,15965,35,0,2.6429689946758534,4.434722222222222 +33,19.88636398,1,2,2,1,42195,2,11341,60,0,3.7205713781853453,3.150277777777778 +41,23.74960518,0,2,3,1,42195,3,13680,50,0,3.0844298245614037,3.8 +29,22.26345062,0,2,2,2,42195,4,10853,65,1,3.8878651064221876,3.0147222222222223 +33,24.20903206,0,2,1,1,42195,2,12060,58,1,3.4987562189054726,3.35 +34,23.04032135,0,2,2,2,42195,4,12197,60,1,3.459457243584488,3.3880555555555554 +30,21.25563812,1,1,1,1,42195,4,13020,40,1,3.2407834101382487,3.6166666666666667 +58,21.04377174,0,2,3,2,42195,2,10936,53,1,3.8583577176298465,3.037777777777778 +31,23.00573349,1,2,3,1,42195,2,12731,35,1,3.3143507972665147,3.536388888888889 +32,20.03710556,0,2,1,1,42195,4,11031,30,0,3.8251291813978785,3.0641666666666665 +42,24.6258564,0,1,3,1,42195,4,11043,58,1,3.8209725618038575,3.0675000000000003 +37,20.62209892,0,2,1,1,42195,4,10512,80,1,4.01398401826484,2.92 +37,22.14966202,0,2,3,1,42195,3,10576,100,1,3.989693645990923,2.937777777777778 +34,24.44100571,0,2,2,3,42195,2,10487,70,1,4.023552970344236,2.9130555555555557 +37,24.52615929,0,2,3,1,42195,3,11262,75,0,3.7466702184336707,3.128333333333333 +40,23.00556564,0,2,3,1,42195,3,14940,48,1,2.82429718875502,4.15 +21,21.24975014,0,2,3,2,42195,4,12300,45,0,3.430487804878049,3.4166666666666665 +28,23.56902504,0,2,2,2,42195,2,13680,60,0,3.0844298245614037,3.8 +22,25.68033028,0,2,3,1,42195,4,16330,23,0,2.583894672382119,4.536111111111111 +27,25.23191071,1,2,2,2,42195,2,14700,45,0,2.870408163265306,4.083333333333333 +30,23.37423897,1,2,3,1,42195,4,13530,60,0,3.1186252771618626,3.7583333333333333 +29,25.16514397,0,2,2,2,42195,2,12508,60,0,3.3734409977614326,3.4744444444444444 +25,22.51829529,0,1,3,1,42195,3,8447,125,1,4.995264590979046,2.346388888888889 +30,23.91919327,0,1,1,1,42195,4,13392,40,0,3.1507616487455197,3.7199999999999998 +35,20.24147606,1,2,1,1,42195,2,10610,65,0,3.976908576814326,2.9472222222222224 +32,19.37938118,0,3,2,1,42195,3,12158,35,0,3.4705543674946537,3.377222222222222 +35,22.91344261,0,1,2,2,42195,4,11100,80,1,3.8013513513513515,3.0833333333333335 +41,19.74053955,1,2,3,1,42195,3,12713,70,0,3.3190434987807755,3.531388888888889 +29,27.4961338,1,2,1,1,42195,3,18180,30,0,2.320957095709571,5.05 +34,25.25252724,0,2,2,1,42195,4,10550,65,1,3.999526066350711,2.930555555555556 +31,22.31223869,0,2,1,1,42195,4,12084,59,1,3.4918073485600796,3.356666666666667 +41,26.7320137,0,2,1,1,42195,4,11294,64,1,3.736054542234815,3.137222222222222 +29,25.20478821,0,2,2,3,42195,4,13080,40,0,3.2259174311926606,3.6333333333333333 +24,22.36208534,0,2,3,2,42195,4,15042,35,0,2.805145592341444,4.178333333333333 +36,21.38588142,1,1,1,1,42195,2,17310,40,0,2.4376083188908146,4.808333333333334 +35,29.21779823,1,2,3,3,42195,2,19545,26,1,2.1588641596316194,5.429166666666666 +26,21.46464729,0,2,2,1,42195,4,11301,62,0,3.7337403769577913,3.1391666666666667 +60,22.44668961,0,2,1,1,42195,2,13890,40,1,3.037796976241901,3.8583333333333334 +35,22.91344261,0,1,2,2,42195,3,12900,40,1,3.2709302325581397,3.5833333333333335 +41,23.67722511,0,2,1,1,42195,4,11100,60,1,3.8013513513513515,3.0833333333333335 +26,22.19460106,1,2,2,1,42195,3,15600,37,1,2.7048076923076922,4.333333333333333 +49,23.45083618,0,2,3,1,42195,3,11936,50,1,3.535103887399464,3.3155555555555556 +36,28.40909004,1,2,3,2,42195,3,19827,43,0,2.128158571644727,5.507499999999999 +37,20.55757523,0,1,3,1,42195,4,9468,90,1,4.4565906210392905,2.6300000000000003 +28,22.3776226,1,2,1,2,42195,3,15430,30,1,2.7346079066753077,4.286111111111111 +37,23.92344666,0,1,1,1,42195,3,12660,35,0,3.3329383886255926,3.5166666666666666 +23,22.3776226,1,2,2,1,42195,4,14460,50,0,2.91804979253112,4.016666666666667 +43,23.15398788,0,1,1,1,42195,2,15120,45,1,2.7906746031746033,4.2 +28,20.44668007,0,2,1,1,42195,3,8966,115,0,4.706111978585769,2.4905555555555554 +36,22.16610718,0,2,3,1,42195,4,10752,60,1,3.9243861607142856,2.9866666666666664 +39,21.91380501,0,1,1,1,42195,4,12574,60,0,3.355734054397964,3.4927777777777775 +55,22.51829529,0,2,3,1,42195,2,12752,38,0,3.308892722710163,3.542222222222222 +29,22.22222137,1,2,1,1,42195,2,11276,73,0,3.7420184462575383,3.1322222222222225 +32,18.39954567,0,2,2,3,42195,2,9848,70,1,4.284626320064988,2.7355555555555555 +30,24.20903206,0,3,1,1,42195,3,20591,35,0,2.04919625078918,5.719722222222222 +37,22.90478325,1,2,1,1,42195,2,11627,51,1,3.629053066139159,3.229722222222222 +41,21.86121559,1,2,1,1,42195,3,13322,67,1,3.167317219636691,3.7005555555555554 +33,18.03393936,0,2,3,1,42195,3,11009,65,1,3.832773185575438,3.0580555555555553 +38,33.73579407,1,2,1,1,42195,2,16258,40,1,2.5953376799114283,4.516111111111111 +28,23.29388237,0,2,1,1,42195,4,10839,65,0,3.8928867976750623,3.0108333333333333 +57,25.5287571,0,1,3,1,42195,2,13155,64,1,3.2075256556442415,3.654166666666667 +24,22.3776226,1,2,3,1,42195,2,16000,44,0,2.6371875,4.444444444444445 +45,20.81165504,1,2,1,2,42195,2,13620,40,0,3.0980176211453743,3.783333333333333 +25,23.08344269,0,1,1,2,42195,3,14160,50,1,2.979872881355932,3.933333333333333 +55,20.25152397,0,1,1,2,42195,2,10984,60,0,3.8414967225054624,3.051111111111111 +33,25.96887398,0,2,2,1,42195,3,12510,47,1,3.3729016786570742,3.475 +27,27.5420742,1,2,1,3,42195,2,20967,40,0,2.012448132780083,5.824166666666667 +37,28.05836296,1,3,1,3,42195,3,19326,40,1,2.183328158956846,5.368333333333334 +47,24.57002449,0,2,3,2,42195,4,12640,55,0,3.3382120253164556,3.511111111111111 +32,24.70283508,0,1,3,3,42195,2,11296,42,1,3.735393059490085,3.137777777777778 +36,20.86985588,1,1,2,3,42195,3,12115,63,1,3.4828724721419726,3.3652777777777776 +32,21.37073326,1,2,1,1,42195,4,12795,74,1,3.2977725674091443,3.5541666666666667 +39,21.86121559,1,2,1,1,42195,2,14400,40,0,2.9302083333333333,4 +60,22.14966202,0,2,3,1,42195,4,14460,56,0,2.91804979253112,4.016666666666667 +24,21.30681801,1,1,1,3,42195,4,13305,40,1,3.1713641488162345,3.6958333333333333 +32,22.3776226,1,2,1,1,42195,3,12971,80,1,3.253025981034616,3.6030555555555557 +47,25.97402573,0,1,1,1,42195,2,12466,55,1,3.384806674153698,3.462777777777778 +51,23.33714104,0,2,1,1,42195,4,14709,55,1,2.8686518458086887,4.085833333333333 +41,26.37949944,1,1,2,1,42195,4,19800,25,0,2.131060606060606,5.5 +50,23.00573349,0,1,2,2,42195,3,10563,62,1,3.9946038057370066,2.934166666666667 +64,24.37873268,0,2,3,1,42195,2,13397,62,0,3.149585728148093,3.721388888888889 +31,18.69939232,1,2,1,1,42195,3,16497,56,0,2.557737770503728,4.5825 +29,23.82097435,1,1,3,2,42195,2,11646,60,1,3.623132405976301,3.235 +51,31.84507942,0,2,2,1,42195,2,16330,30,0,2.583894672382119,4.536111111111111 +39,22.70872116,0,2,1,2,42195,3,12936,51,0,3.26182745825603,3.5933333333333333 +24,23.5923233,0,1,3,3,42195,2,9937,74,1,4.24625138371742,2.760277777777778 +53,23.14814949,0,2,2,1,42195,2,12220,55,1,3.452945990180033,3.3944444444444444 +46,22.86756706,0,2,2,2,42195,4,10309,90,0,4.093025511688816,2.863611111111111 +27,24.09897804,1,1,2,2,42195,3,13860,45,0,3.044372294372294,3.85 +33,21.5213356,1,2,1,1,42195,4,15780,40,0,2.673954372623574,4.383333333333334 +29,24.20903206,1,1,3,1,42195,2,14131,75,1,2.9859882527775814,3.925277777777778 +40,23.00556564,0,2,3,1,42195,3,11100,70,1,3.8013513513513515,3.0833333333333335 +37,19.55280685,0,2,3,1,42195,4,10834,84,1,3.8946834040982092,3.0094444444444446 +52,18.95542717,1,2,3,1,42195,3,17283,55,0,2.4414164207602846,4.800833333333333 +30,25.11189079,0,1,2,1,42195,2,11280,50,1,3.7406914893617023,3.1333333333333333 +33,20.48413086,1,2,1,1,42195,3,12837,56,0,3.286982939939238,3.565833333333333 +44,26.73796654,0,2,1,2,42195,3,14760,24,0,2.858739837398374,4.1 +33,23.20066833,0,2,3,1,42195,4,10229,87,1,4.12503666047512,2.8413888888888885 +40,20.41903305,1,2,3,1,42195,4,13110,70,0,3.2185354691075516,3.6416666666666666 +38,22.14966202,0,2,1,1,42195,4,10751,60,1,3.924751185936192,2.986388888888889 +38,20.19000053,1,2,2,2,42195,3,11226,64,1,3.7586851950828435,3.118333333333333 +31,22.7912426,0,1,2,1,42195,2,10395,72,1,4.059163059163059,2.8875 +42,31.72218323,1,2,3,2,42195,3,22860,35,0,1.8458005249343832,6.35 +47,23.70931816,0,2,1,3,42195,2,14122,33,0,2.987891233536326,3.9227777777777777 +34,19.85831833,1,2,1,1,42195,3,17700,50,0,2.3838983050847458,4.916666666666667 +34,22.4551754,0,2,3,1,42195,3,9696,20,1,4.351794554455446,2.6933333333333334 +47,25.92195129,0,1,2,2,42195,2,11968,50,1,3.5256517379679146,3.3244444444444445 +34,24.0420742,1,2,1,3,42195,3,17760,51,0,2.3758445945945947,4.933333333333334 +42,19.47665596,1,2,3,1,42195,3,13800,50,0,3.0576086956521737,3.8333333333333335 +42,27.4961338,0,2,1,1,42195,2,16020,30,0,2.6338951310861423,4.45 +31,22.47685242,1,2,3,2,42195,3,13610,42,0,3.100293901542983,3.780555555555556 +25,23.37423897,1,2,1,1,42195,3,12900,24,1,3.2709302325581397,3.5833333333333335 +30,23.88304138,0,2,1,1,42195,3,11700,32,0,3.6064102564102565,3.25 +32,17.75568008,1,1,2,3,42195,3,10923,60,1,3.8629497390826697,3.0341666666666667 +28,22.0347538,0,2,1,1,42195,4,17185,32,1,2.455338958393948,4.773611111111111 +27,21.20791435,0,2,2,2,42195,2,13747,45,0,3.0693969593365824,3.818611111111111 +45,23.64034653,0,1,1,1,42195,4,10595,110,1,3.9825389334591788,2.9430555555555555 +46,24.96845627,0,1,3,1,42195,4,14199,45,1,2.9716881470526095,3.944166666666667 +28,25.60000038,0,2,2,1,42195,3,15324,45,0,2.7535238841033673,4.256666666666667 +43,19.6209259,0,1,2,1,42195,3,11642,90,1,3.624377254767222,3.233888888888889 +51,21.74523163,0,2,1,2,42195,4,10782,80,1,3.9134668892598774,2.9949999999999997 +45,23.80480194,0,2,2,1,42195,4,12662,60,1,3.33241194124151,3.5172222222222222 +51,23.45083618,0,2,3,1,42195,3,12504,56,1,3.3745201535508635,3.4733333333333336 +38,28.2003727,0,2,1,2,42195,4,11880,45,0,3.551767676767677,3.3 +34,23.00556564,0,2,3,1,42195,4,10750,75,0,3.9251162790697673,2.986111111111111 +28,23.98990059,0,2,3,1,42195,4,11130,53,1,3.7911051212938007,3.091666666666667 +45,19.51649475,0,2,2,1,42195,2,13423,45,0,3.14348506295165,3.7286111111111113 +43,19.88636398,1,2,2,1,42195,3,12909,50,1,3.2686497792237974,3.5858333333333334 +34,24.44100571,0,2,3,3,42195,3,10710,65,1,3.939775910364146,2.975 +30,21.5213356,1,2,3,1,42195,4,19618,30,1,2.150830869609542,5.449444444444444 +45,20.33395195,0,2,2,3,42195,2,9930,82,1,4.2492447129909365,2.7583333333333333 +33,22.08595085,1,1,3,3,42195,3,12867,40,0,3.2793191886220563,3.5741666666666663 +47,21.83592224,0,2,2,1,42195,4,10747,86,1,3.926211966130083,2.9852777777777777 +43,23.14814949,0,2,1,1,42195,3,10200,45,0,4.136764705882353,2.8333333333333335 +38,19.04384041,0,2,1,1,42195,4,13069,75,1,3.228632642130232,3.6302777777777777 +52,18.46590805,1,2,2,1,42195,2,15120,45,1,2.7906746031746033,4.2 +41,31.45643044,0,2,3,2,42195,2,17623,42,0,2.3943142484253532,4.895277777777777 +31,23.64955711,1,2,1,1,42195,4,13230,50,1,3.189342403628118,3.675 +33,25.96857071,0,2,3,2,42195,3,12540,40,0,3.3648325358851676,3.4833333333333334 +70,20.48413086,1,2,2,2,42195,3,16691,48,1,2.5280091067042116,4.636388888888889 +26,25.04382706,0,2,2,1,42195,4,12000,40,0,3.51625,3.3333333333333335 +70,26.42340279,0,1,3,2,42195,3,17340,35,1,2.4333910034602075,4.816666666666666 +47,22.16610718,0,2,2,1,42195,3,12265,55,1,3.44027721157766,3.406944444444444 +47,18.32382584,1,2,1,2,42195,4,12184,80,0,3.4631483913328958,3.3844444444444446 +35,21.94683647,1,2,1,3,42195,2,17870,30,0,2.3612199216564074,4.963888888888889 +28,18.75529671,0,1,2,1,42195,4,9742,75,1,4.331246150687744,2.7061111111111114 +33,22.0466156,0,2,3,1,42195,3,11055,35,1,3.8168249660786975,3.0708333333333333 +35,27.54821014,1,2,2,1,42195,3,22186,24,1,1.9018750563418372,6.1627777777777775 +31,19.97297096,1,2,3,2,42195,4,14307,42,0,2.949255609142378,3.9741666666666666 +48,20.41903305,1,2,3,3,42195,3,11777,56,1,3.582830941665959,3.2713888888888887 +43,18.28758621,0,1,3,2,42195,2,15243,49,0,2.76815587482779,4.234166666666667 +42,22.03365898,1,2,2,1,42195,2,12619,38,1,3.3437673349710755,3.5052777777777777 +56,24.48979568,0,2,1,1,42195,4,16440,35,1,2.5666058394160585,4.566666666666666 +60,20.92764473,0,2,3,1,42195,2,17880,40,0,2.3598993288590604,4.966666666666667 +60,27.55586243,0,1,2,1,42195,3,17520,33,0,2.408390410958904,4.866666666666666 +35,22.19460106,1,2,2,1,42195,3,13127,50,1,3.2143673345014094,3.6463888888888887 +40,23.84960938,0,1,2,1,42195,3,11100,55,1,3.8013513513513515,3.0833333333333335 +44,24.5955925,1,2,3,1,42195,2,14400,40,1,2.9302083333333333,4 +30,21.30681801,1,2,2,2,42195,3,14043,34,1,3.0046998504593034,3.9008333333333334 +31,24.55106735,0,2,2,3,42195,4,13321,50,0,3.167554988364237,3.700277777777778 +30,24.18745041,1,2,2,1,42195,2,11940,65,1,3.5339195979899496,3.316666666666667 +27,20.91937065,0,2,1,1,42195,3,9665,85,0,4.365752715985515,2.6847222222222222 +45,25.79951668,1,2,2,1,42195,3,17340,34,0,2.4333910034602075,4.816666666666666 +43,22.50635719,0,2,1,1,42195,4,12274,52,1,3.437754603226332,3.4094444444444445 +47,25.04382706,1,2,1,1,42195,3,15660,40,0,2.6944444444444446,4.35 +28,24.04452515,0,2,1,1,42195,3,13378,41,1,3.1540589026760353,3.716111111111111 +39,26.41929436,0,1,1,1,42195,4,13520,55,0,3.1209319526627217,3.7555555555555555 +45,23.5923233,0,2,1,1,42195,3,11040,55,0,3.8220108695652173,3.066666666666667 +30,22.44668961,0,2,1,2,42195,3,13200,30,1,3.1965909090909093,3.6666666666666665 +43,25.97402573,0,2,1,3,42195,3,14220,36,0,2.967299578059072,3.95 +34,23.37423897,1,2,3,2,42195,2,12070,60,0,3.495857497928749,3.3527777777777774 +29,20.522686,1,2,2,1,42195,2,13326,31,0,3.1663665015758666,3.7016666666666667 +30,22.51829529,0,1,3,1,42195,3,13440,60,0,3.1395089285714284,3.7333333333333334 +27,20.15620995,1,2,3,1,42195,2,13328,42,0,3.165891356542617,3.7022222222222223 +35,25.27348328,0,2,2,1,42195,2,14354,35,1,2.939598718127351,3.987222222222222 +48,22.4551754,1,2,1,3,42195,2,18667,14,1,2.2604060641774253,5.185277777777778 +31,22.76068687,0,2,1,1,42195,3,12300,50,0,3.430487804878049,3.4166666666666665 +41,26.7320137,0,2,1,3,42195,2,13032,22,0,3.2377992633517496,3.6199999999999997 +28,21.49645615,0,2,1,2,42195,3,9363,71,1,4.506568407561679,2.6008333333333336 +53,23.37423897,0,2,3,1,42195,2,13321,47,0,3.167554988364237,3.700277777777778 +23,22.91344261,0,2,3,2,42195,2,17587,20,0,2.399215329504748,4.885277777777778 +33,22.35768318,0,1,2,2,42195,3,11467,78,0,3.6796895439086073,3.185277777777778 +44,20.86985588,1,2,1,1,42195,4,15458,50,0,2.7296545478069607,4.293888888888889 +32,24.48979568,0,2,3,1,42195,2,9787,83,0,4.311331357923777,2.718611111111111 +45,24.09897804,1,2,1,3,42195,2,17634,40,0,2.392820687308608,4.8983333333333325 +42,24.18745041,1,3,2,2,42195,2,17171,44,0,2.4573408654126143,4.769722222222223 +36,30.29488754,1,2,3,1,42195,3,16585,40,0,2.5441664154356345,4.606944444444445 +31,21.81818199,1,2,3,1,42195,4,14526,30,0,2.904791408508881,4.035 +34,21.74523163,0,2,2,1,42195,3,12180,50,0,3.4642857142857144,3.3833333333333333 +34,23.08344269,0,1,3,1,42195,2,12484,50,1,3.379926305671259,3.4677777777777776 +46,20.86985588,1,2,1,1,42195,2,15600,50,1,2.7048076923076922,4.333333333333333 +30,21.23309135,1,2,2,3,42195,3,15780,43,0,2.673954372623574,4.383333333333334 +37,23.27125931,1,2,2,2,42195,4,12826,50,1,3.289801964759083,3.562777777777778 +35,21.70465088,1,2,1,1,42195,3,13484,50,0,3.12926431326016,3.7455555555555553 +41,22.47685242,1,2,2,2,42195,4,17280,40,0,2.4418402777777777,4.8 +34,23.67722511,0,2,2,1,42195,3,13028,55,0,3.2387933681301813,3.618888888888889 +33,20.321558,0,2,1,1,42195,4,13450,50,0,3.137174721189591,3.736111111111111 +35,22.60793114,0,1,1,1,42195,4,9335,72,1,4.520085698982324,2.593055555555556 +26,19.79558945,1,2,2,1,42195,2,13523,48,1,3.120239591806552,3.7563888888888886 +40,25.37774658,0,2,2,1,42195,4,11691,55,1,3.609186553759302,3.2475 +41,22.90478325,1,2,1,1,42195,2,14940,35,1,2.82429718875502,4.15 +29,24.52615929,0,2,3,2,42195,2,14761,45,1,2.8585461689587426,4.100277777777778 +34,20.58569527,0,2,3,1,42195,2,14280,45,0,2.9548319327731094,3.966666666666667 +46,22.176784519999998,0,3,1,1,42195,2,12702,37,0,3.3219178082191783,3.5283333333333333 +33,21.49645615,0,2,1,1,42195,2,10527,52,0,4.008264462809917,2.9241666666666664 +25,22.19460106,1,1,2,1,42195,3,12495,38,1,3.376950780312125,3.470833333333333 +34,22.53944397,0,2,1,1,42195,2,11225,43,1,3.75902004454343,3.118055555555556 +34,21.74523163,1,2,1,2,42195,3,14050,50,1,3.003202846975089,3.9027777777777777 +35,30.5534687,0,2,1,2,42195,3,14264,17,0,2.9581463825014023,3.962222222222222 +45,25.24751663,0,2,1,1,42195,2,12882,40,1,3.275500698649278,3.578333333333333 +32,22.19567108,0,1,2,1,42195,2,10764,102,1,3.920011148272018,2.99 +36,23.16774559,1,2,2,1,42195,2,17101,60,1,2.4673995672767672,4.750277777777778 +36,25.6369915,0,2,2,2,42195,4,11580,55,0,3.643782383419689,3.216666666666667 +32,23.30609322,0,1,2,2,42195,3,15310,48,1,2.756041802743305,4.252777777777777 +35,24.37873268,1,2,1,2,42195,2,21290,37,1,1.9819163926726162,5.913888888888889 +39,22.44668961,0,2,3,3,42195,3,14161,50,0,2.979662453216581,3.9336111111111114 +32,23.78989792,0,2,3,1,42195,2,11384,75,1,3.7065179198875615,3.162222222222222 +28,17.5306797,1,1,1,2,42195,3,12180,60,0,3.4642857142857144,3.3833333333333333 +27,25.74573708,0,1,1,1,42195,4,11058,45,1,3.8157894736842106,3.0716666666666668 +24,20.77922058,0,2,3,2,42195,2,11951,50,1,3.530666889800017,3.3197222222222225 +24,20.77922058,0,2,3,1,42195,4,10448,55,1,4.038571975497703,2.902222222222222 +41,24.37873268,0,2,1,1,42195,4,14130,21,0,2.98619957537155,3.925 +28,20.9185257,0,1,1,2,42195,2,10717,63,1,3.9372025753475786,2.9769444444444444 +30,21.53769112,1,2,2,1,42195,4,12499,47,1,3.3758700696055683,3.4719444444444445 +45,23.49176788,0,2,1,2,42195,2,19200,40,1,2.19765625,5.333333333333333 +38,25.23191071,0,1,2,2,42195,4,11380,61,0,3.7078207381370825,3.161111111111111 +29,17.86459732,1,2,2,1,42195,2,14238,50,1,2.9635482511588704,3.955 +32,23.70815659,0,2,3,3,42195,3,10604,40,1,3.979158807996982,2.9455555555555555 +32,21.75764084,1,2,2,2,42195,3,12338,45,1,3.419922191603177,3.427222222222222 +34,18.36547279,1,2,3,3,42195,3,11040,70,1,3.8220108695652173,3.066666666666667 +28,21.59191895,0,2,3,1,42195,4,12180,57,1,3.4642857142857144,3.3833333333333333 +37,19.86567116,1,1,1,1,42195,3,11426,60,1,3.6928934010152283,3.173888888888889 +35,22.176784519999998,0,2,3,1,42195,3,10994,65,1,3.838002546843733,3.053888888888889 +33,25.82033348,0,2,3,1,42195,3,12581,18,0,3.3538669422144505,3.4947222222222223 +31,21.15354919,0,2,1,1,42195,3,10391,85,1,4.060725627947262,2.886388888888889 +43,25.95398712,0,2,1,1,42195,3,14820,65,0,2.847165991902834,4.116666666666666 +26,25.16318321,1,2,1,2,42195,3,11744,48,1,3.5928985013623977,3.262222222222222 +44,23.08344269,0,1,2,1,42195,2,12798,52,1,3.296999531176746,3.555 +24,17.87248611,0,2,1,1,42195,3,10577,75,1,3.989316441334972,2.9380555555555556 +36,26.61252975,0,1,2,1,42195,2,13560,20,0,3.1117256637168142,3.7666666666666666 +29,21.68908119,1,2,2,1,42195,2,14004,40,1,3.0130676949443016,3.89 +27,22.01950264,0,2,3,2,42195,3,12312,60,1,3.4271442495126707,3.42 +53,22.41249084,0,2,2,1,42195,3,13289,48,1,3.1751824817518246,3.6913888888888886 +33,22.44668961,0,2,2,1,42195,4,12480,52,0,3.3810096153846154,3.466666666666667 +21,21.97979736,0,2,2,1,42195,3,11580,50,1,3.643782383419689,3.216666666666667 +29,23.59925842,0,1,2,1,42195,2,10782,68,1,3.9134668892598774,2.9949999999999997 +28,25.23191071,0,1,3,1,42195,2,12964,60,0,3.2547824745448937,3.601111111111111 +32,31.38913918,0,2,1,1,42195,2,14896,60,1,2.8326396348012888,4.137777777777778 +46,25.96857071,0,2,2,1,42195,3,11433,60,1,3.6906323799527683,3.1758333333333337 +32,22.0347538,0,2,1,1,42195,4,14460,35,0,2.91804979253112,4.016666666666667 +30,20.98412895,0,2,2,2,42195,3,13200,31,1,3.1965909090909093,3.6666666666666665 +41,23.67722511,0,2,1,1,42195,3,15160,50,1,2.783311345646438,4.211111111111111 +33,24.50284004,1,2,2,1,42195,3,14822,40,0,2.8467818108217515,4.117222222222222 +44,23.74768066,0,1,1,1,42195,4,11680,50,1,3.612585616438356,3.2444444444444445 +35,23.82097435,1,2,1,2,42195,3,18010,40,0,2.3428650749583566,5.002777777777778 +34,25.53605843,0,2,3,2,42195,3,12900,55,1,3.2709302325581397,3.5833333333333335 +31,23.80480194,1,2,3,1,42195,3,14329,62,1,2.9447274757484823,3.980277777777778 +45,23.87511635,0,2,3,2,42195,3,12007,55,1,3.5142000499708503,3.335277777777778 +28,22.93917274,0,2,1,1,42195,3,15960,40,0,2.643796992481203,4.433333333333334 +25,23.64960861,1,1,2,1,42195,2,12355,65,1,3.415216511533792,3.4319444444444445 +35,23.24191475,0,2,1,1,42195,4,13260,50,0,3.182126696832579,3.683333333333333 +29,24.1929512,0,2,2,3,42195,4,12065,55,1,3.4973062577704104,3.351388888888889 +28,23.37423897,1,2,3,2,42195,3,13719,40,0,3.0756614913623443,3.8108333333333335 +44,23.56902504,0,1,3,1,42195,3,13779,55,1,3.062268669714783,3.8275 +45,28.55020332,0,2,2,3,42195,3,19740,55,0,2.1375379939209727,5.483333333333333 +43,23.90596962,0,1,3,2,42195,2,13242,62,1,3.1864521975532396,3.6783333333333332 +30,46.90821457,1,2,3,1,42195,3,18856,26,0,2.2377492575307594,5.237777777777778 +31,21.30681801,1,2,1,1,42195,2,14401,40,1,2.9300048607735576,4.000277777777778 +45,25.93618584,0,1,2,1,42195,3,12305,45,1,3.429093864282812,3.4180555555555556 +39,24.85795403,1,2,1,3,42195,3,13350,54,1,3.160674157303371,3.7083333333333335 +61,23.4375,0,2,2,1,42195,3,13811,65,0,3.0551734124972847,3.836388888888889 +39,23.74768066,0,2,2,1,42195,3,14400,50,1,2.9302083333333333,4 +30,21.86121559,1,2,3,2,42195,2,14040,38,0,3.0053418803418803,3.9 +38,19.6476059,0,2,1,1,42195,4,9888,65,1,4.267293689320389,2.746666666666667 +47,21.5194149,1,2,2,2,42195,3,17778,32,0,2.373439082011475,4.9383333333333335 +41,25.97402573,0,2,3,2,42195,3,14820,42,0,2.847165991902834,4.116666666666666 +39,27.1192646,1,2,2,1,42195,2,15420,40,1,2.7363813229571985,4.283333333333333 +27,25.89813232,0,1,3,2,42195,4,14278,45,0,2.955245832749685,3.966111111111111 +30,22.01950264,0,2,1,1,42195,3,12315,32,0,3.4263093788063337,3.4208333333333334 +36,23.82097435,1,2,3,1,42195,3,18120,40,0,2.3286423841059603,5.033333333333333 +57,23.98273849,0,2,2,1,42195,2,13067,68,0,3.229126807989592,3.629722222222222 +38,25.8121376,0,2,1,1,42195,4,14370,40,1,2.9363256784968685,3.9916666666666667 +26,26.37949944,1,2,1,1,42195,3,13874,50,0,3.041300273893614,3.8538888888888887 +41,27.45825768,0,2,3,1,42195,3,15570,30,0,2.710019267822736,4.325 +42,26.40168762,0,1,2,1,42195,3,12896,55,0,3.271944789081886,3.582222222222222 +42,19.9094696,0,1,2,1,42195,4,11311,55,1,3.730439395278932,3.141944444444445 +41,21.85183334,1,1,2,2,42195,3,15362,40,0,2.7467126676214035,4.267222222222222 +54,25.96887398,0,2,1,1,42195,3,15390,33,1,2.7417153996101367,4.275 +30,23.64960861,1,2,3,1,42195,2,15601,35,1,2.7046343183129284,4.333611111111111 +33,19.70556259,0,2,2,1,42195,4,9279,80,1,4.547365017782089,2.5775 +44,23.84960938,0,1,3,1,42195,2,11544,55,1,3.6551455301455302,3.2066666666666666 +39,20.88916206,1,1,1,1,42195,3,13110,55,1,3.2185354691075516,3.6416666666666666 +30,24.83164978,0,2,1,2,42195,4,12616,30,0,3.344562460367787,3.5044444444444447 +51,23.49176788,1,1,3,2,42195,2,13504,60,1,3.124629739336493,3.751111111111111 +40,20.3689785,1,2,3,2,42195,3,16097,45,0,2.6212958936447786,4.47138888888889 +66,21.5213356,0,1,2,1,42195,3,13332,55,0,3.164941494149415,3.703333333333333 +50,24.02800179,1,2,1,1,42195,3,14362,33,1,2.937961286728868,3.9894444444444446 +43,21.15354919,0,1,2,1,42195,3,18066,35,0,2.3356027897708405,5.0183333333333335 +35,24.48979568,0,2,3,2,42195,3,10788,58,1,3.911290322580645,2.996666666666667 +42,23.83880615,1,2,2,1,42195,2,14091,30,1,2.994464551841601,3.9141666666666666 +45,20.91937065,0,2,3,2,42195,3,10080,70,1,4.186011904761905,2.8 +52,24.85795403,1,2,1,1,42195,3,16200,65,0,2.60462962962963,4.5 +33,22.91344261,0,2,2,1,42195,4,10769,65,1,3.918191104095088,2.991388888888889 +33,20.58569527,0,2,3,1,42195,2,10491,50,1,4.022018873319989,2.9141666666666666 +52,26.16460609,0,1,2,1,42195,3,14406,45,0,2.928987921699292,4.001666666666667 +32,22.51829529,0,2,2,1,42195,3,10968,82,1,3.8471006564551424,3.046666666666667 +34,22.53944397,1,2,2,1,42195,2,12537,62,0,3.3656377123713805,3.4825 +41,22.02581596,0,2,2,1,42195,2,11752,44,1,3.5904526889040165,3.2644444444444445 +34,23.88304138,0,1,2,1,42195,4,12525,57,1,3.3688622754491018,3.4791666666666665 +33,25.24751663,0,1,3,1,42195,3,12344,40,0,3.4182598833441347,3.428888888888889 +57,21.85050583,0,2,1,2,42195,2,14282,50,0,2.9544181487186667,3.967222222222222 +37,23.51625824,0,2,3,1,42195,3,15851,34,1,2.6619771623241437,4.4030555555555555 +54,20.44668007,1,2,2,3,42195,2,14708,42,1,2.868846886048409,4.085555555555556 +58,23.82998085,0,2,2,2,42195,4,12420,58,1,3.3973429951690823,3.45 +37,27.07126808,0,2,3,2,42195,3,13412,45,1,3.1460632269609303,3.7255555555555557 +56,30.86419868,0,1,2,2,42195,2,19980,40,0,2.1118618618618616,5.55 +37,18.83348274,0,1,2,1,42195,2,8290,140,1,5.089867310012063,2.3027777777777776 +31,25.87862206,1,2,2,1,42195,2,15591,50,1,2.706369059072542,4.3308333333333335 +42,28.05194664,0,2,3,2,42195,3,14480,35,0,2.914019337016575,4.022222222222222 +29,20.95170403,1,2,3,3,42195,4,11972,50,1,3.5244737721349817,3.3255555555555554 +32,22.2054863,1,2,2,1,42195,3,13149,45,1,3.2089892767510837,3.6525000000000003 +31,29.0236969,0,2,2,3,42195,3,14598,20,1,2.890464447184546,4.055000000000001 +44,25.16514397,0,2,2,3,42195,4,13628,35,1,3.0961990020545933,3.7855555555555553 +34,19.04384041,0,1,3,1,42195,3,10377,100,0,4.0662041052327265,2.8825 +40,23.5923233,0,2,3,3,42195,3,11970,50,1,3.525062656641604,3.325 +25,22.03365898,1,2,1,1,42195,3,15301,53,0,2.757662897849814,4.250277777777778 +28,23.64960861,1,2,1,1,42195,4,16586,45,0,2.5440130230314724,4.607222222222222 +23,21.40518761,0,2,3,1,42195,4,11989,60,1,3.5194761865042956,3.330277777777778 +48,21.46464729,0,2,1,1,42195,3,12477,35,0,3.381822553498437,3.4658333333333333 +36,24.44100571,0,2,3,1,42195,4,13022,50,0,3.240285670403932,3.6172222222222223 +49,20.9185257,1,2,2,2,42195,4,14506,52,0,2.908796360126844,4.029444444444445 +26,27.19061852,0,2,2,2,42195,3,15360,35,0,2.7470703125,4.266666666666667 +32,24.0484848,0,2,2,1,42195,2,14127,45,0,2.9868337226587385,3.9241666666666664 +34,26.93602943,0,2,1,2,42195,2,9694,72,1,4.352692387043532,2.6927777777777777 +57,23.74768066,0,2,3,1,42195,2,17100,45,0,2.4675438596491226,4.75 +27,23.14814949,0,2,2,2,42195,4,11971,40,0,3.524768189791997,3.325277777777778 +33,24.8768692,0,1,3,1,42195,2,15338,35,0,2.7510105620028686,4.260555555555555 +44,24.32528305,1,2,2,1,42195,2,12543,60,1,3.364027744558718,3.484166666666667 +30,18.53243256,1,2,2,1,42195,2,14119,41,1,2.9885260995821232,3.9219444444444442 +31,23.74768066,1,2,3,2,42195,4,19343,32,0,2.181409295352324,5.373055555555555 +30,25.97402573,0,2,1,1,42195,3,11310,45,0,3.730769230769231,3.1416666666666666 +33,21.89049911,1,2,2,1,42195,2,12862,61,1,3.2805939978230447,3.572777777777778 +30,24.93506622,1,2,3,1,42195,2,16700,42,1,2.5266467065868263,4.638888888888888 +36,24.55106735,0,2,1,1,42195,2,11580,56,1,3.643782383419689,3.216666666666667 +49,20.91937065,1,1,1,1,42195,3,17520,38,0,2.408390410958904,4.866666666666666 +42,24.85058212,0,2,3,2,42195,3,13630,60,1,3.095744680851064,3.786111111111111 +32,22.72727203,0,3,1,1,42195,2,15000,30,0,2.813,4.166666666666667 +36,25.54157829,1,2,2,2,42195,3,15338,60,0,2.7510105620028686,4.260555555555555 +27,20.92425156,1,2,3,2,42195,3,13170,50,0,3.203872437357631,3.658333333333333 +30,22.01950264,1,2,1,1,42195,3,12337,55,1,3.4201994001783254,3.4269444444444446 +42,27.4961338,0,2,3,2,42195,3,13195,42,1,3.197802197802198,3.6652777777777774 +62,24.56541252,0,2,1,1,42195,3,17607,42,0,2.396490032373488,4.890833333333333 +31,19.97483444,1,2,1,1,42195,2,11302,80,0,3.7334100159263848,3.1394444444444445 +46,21.89049911,1,2,2,1,42195,3,13812,35,1,3.054952215464813,3.8366666666666664 +45,22.36208534,0,2,3,2,42195,2,11970,55,1,3.525062656641604,3.325 +28,21.60493851,0,2,1,1,42195,2,14392,34,0,2.9318371317398553,3.997777777777778 +25,19.70114517,1,1,3,1,42195,3,17940,42,1,2.3520066889632107,4.983333333333333 +35,24.34137344,0,2,3,1,42195,3,12055,30,0,3.5002073828287017,3.348611111111111 +23,23.27775955,1,2,2,1,42195,3,15660,18,0,2.6944444444444446,4.35 +30,24.48979568,0,2,2,1,42195,2,16110,45,0,2.6191806331471135,4.475 +44,24.23761749,0,2,3,1,42195,2,12289,63,1,3.433558466921637,3.413611111111111 +34,21.24975014,0,2,3,1,42195,3,12602,32,0,3.3482780511029997,3.5005555555555556 +41,25.20478821,0,2,1,1,42195,2,13500,20,1,3.1255555555555556,3.75 diff --git a/materials/R/worksheet_regression2/worksheet_regression2.ipynb b/materials/R/worksheet_regression2/worksheet_regression2.ipynb new file mode 100644 index 0000000..3f67068 --- /dev/null +++ b/materials/R/worksheet_regression2/worksheet_regression2.ipynb @@ -0,0 +1,1902 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "44d76b7a3bb4dfb5634051dc99ede3ff", + "grade": false, + "grade_id": "cell-fe148db84368f758", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "# Worksheet: Regression II: linear regression\n", + "\n", + "This worksheet covers the [Regression II: linear regression](https://datasciencebook.ca/regression2.html) chapter of the online textbook, which also lists the learning objectives for this worksheet. You should read the textbook chapter before attempting this worksheet. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "94aed1b06b55ea926d94a5370e98a804", + "grade": false, + "grade_id": "cell-9d267d6cbb575992", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "### Run this cell before continuing.\n", + "library(tidyverse)\n", + "library(repr)\n", + "library(tidymodels)\n", + "library(cowplot)\n", + "options(repr.matrix.max.rows = 6)\n", + "source('cleanup.R')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "9f9b1c5d7e6c54f6b21d4f3a94bd4b02", + "grade": false, + "grade_id": "cell-5cd24ea6314eb5a1", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "### Warm-up Questions\n", + "\n", + "Here are some warm-up questions on the topic of multiple regression to get you thinking before we jump into data analysis. The course readings should help you answer these.\n", + "\n", + "**Question 1.0** Multiple Choice: \n", + "
{points: 1}\n", + "\n", + "In multivariate k-nn regression with one outcome/target variable and two predictor variables, the predictions take the form of what shape?\n", + "\n", + "A. a flat plane\n", + "\n", + "B. a wiggly/flexible plane\n", + "\n", + "C. A straight line\n", + "\n", + "D. a wiggly/flexible line\n", + "\n", + "E. a 4D hyperplane\n", + "\n", + "F. a 4D wiggly/flexible hyperplane\n", + "\n", + "Save the letter of the answer you think is correct to a variable named `answer1.0`. Make sure you put quotations around the letter and pay attention to case." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "b68e9781d7d6de0dc3b6020e0032c8a2", + "grade": false, + "grade_id": "cell-c83bc93df7f00340", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "66ab381450430f7d6bcc24aa69a3ff6f", + "grade": true, + "grade_id": "cell-69d13db813c674a2", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer1.0 is not character\"= setequal(digest(paste(toString(class(answer1.0)), \"c1dca\")), \"0062368865d313b1d9a0758736981e96\"))\n", + "stopifnot(\"length of answer1.0 is not correct\"= setequal(digest(paste(toString(length(answer1.0)), \"c1dca\")), \"0b9103cc5a6d5362898cda5f7da0cd1f\"))\n", + "stopifnot(\"value of answer1.0 is not correct\"= setequal(digest(paste(toString(tolower(answer1.0)), \"c1dca\")), \"cfeaebac99f01978e387bb024cdc6d11\"))\n", + "stopifnot(\"letters in string value of answer1.0 are correct but case is not correct\"= setequal(digest(paste(toString(answer1.0), \"c1dca\")), \"6bfc86aa847ff4157dd3ba4de5012220\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "ce8c7ccf1a34341294b2fa56d53a7361", + "grade": false, + "grade_id": "cell-17b50854bfe8bed1", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 1.1** Multiple Choice: \n", + "
{points: 1}\n", + "\n", + "In simple linear regression with one outcome/target variable and one predictor variable, the predictions take the form of what shape?\n", + "\n", + "A. a flat plane\n", + "\n", + "B. a wiggly/flexible plane\n", + "\n", + "C. A straight line\n", + "\n", + "D. a wiggly/flexible line\n", + "\n", + "E. a 4D hyperplane\n", + "\n", + "F. a 4D wiggly/flexible hyperplane\n", + "\n", + "Save the letter of the answer you think is correct to a variable named `answer1.1`. Make sure you put quotations around the letter and pay attention to case." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "d34dc1682e6322aefc58306e7d4c1c93", + "grade": false, + "grade_id": "cell-4ba2e045fef50db4", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "ea9060b1e9142004aab4d87847eaee56", + "grade": true, + "grade_id": "cell-f7542e36e61cd131", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer1.1 is not character\"= setequal(digest(paste(toString(class(answer1.1)), \"9f35e\")), \"0f1e7877308258f0f4d1265b701d5e1e\"))\n", + "stopifnot(\"length of answer1.1 is not correct\"= setequal(digest(paste(toString(length(answer1.1)), \"9f35e\")), \"96556f84d0cfd3c5304b670e691a10af\"))\n", + "stopifnot(\"value of answer1.1 is not correct\"= setequal(digest(paste(toString(tolower(answer1.1)), \"9f35e\")), \"0afb28f4f1c05ad02549badb853886e1\"))\n", + "stopifnot(\"letters in string value of answer1.1 are correct but case is not correct\"= setequal(digest(paste(toString(answer1.1), \"9f35e\")), \"5f72ad18f4b91281489a09abd242d8f6\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "e23b8e9e1e97119c478f42967f22353b", + "grade": false, + "grade_id": "cell-4c7cb5e7fd4eb668", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 1.2** Multiple Choice: \n", + "
{points: 1}\n", + "\n", + "In multiple linear regression with one outcome/target variable and two predictor variables, the predictions take the form of what shape?\n", + "\n", + "A. a flat plane\n", + "\n", + "B. a wiggly/flexible plane\n", + "\n", + "C. A straight line\n", + "\n", + "D. a wiggly/flexible line\n", + "\n", + "E. a 4D hyperplane\n", + "\n", + "F. a 4D wiggly/flexible hyperplane\n", + "\n", + "Save the letter of the answer you think is correct to a variable named `answer1.2`. Make sure you put quotations around the letter and pay attention to case." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "1c3f3c09e5b729cf0c2c0661a5535c95", + "grade": false, + "grade_id": "cell-079e531ebcb88c60", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "99850cb54bb10e3d9ef41eb1cfdcd231", + "grade": true, + "grade_id": "cell-547b82e7a64b9aa1", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer1.2 is not character\"= setequal(digest(paste(toString(class(answer1.2)), \"962df\")), \"edcf1b7f6d8db50d0ef71fe87b9ff265\"))\n", + "stopifnot(\"length of answer1.2 is not correct\"= setequal(digest(paste(toString(length(answer1.2)), \"962df\")), \"7f043cb37eae566922f1d03b91a3eeff\"))\n", + "stopifnot(\"value of answer1.2 is not correct\"= setequal(digest(paste(toString(tolower(answer1.2)), \"962df\")), \"63b747cced3f34f794ea789c80a52f56\"))\n", + "stopifnot(\"letters in string value of answer1.2 are correct but case is not correct\"= setequal(digest(paste(toString(answer1.2), \"962df\")), \"8ead807c82b5f9abb9a8d8440d856001\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "90902cc596c86e241d127a643265bc37", + "grade": false, + "grade_id": "cell-6ae21507eed64700", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "### Understanding Simple Linear Regression\n", + "\n", + "Consider this small and simple dataset: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "f0f86068987ba71eba448bbf880edf7f", + "grade": false, + "grade_id": "cell-2850a0b99f14004c", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "## run this code \n", + "simple_data <- tibble(X = c(1, 2, 3, 6, 7, 7),\n", + " Y = c(1, 1, 3, 5, 7, 6))\n", + "options(repr.plot.width = 5, repr.plot.height = 5)\n", + "base <- ggplot(simple_data, aes(x = X, y = Y)) +\n", + " geom_point(size = 2) +\n", + " scale_x_continuous(limits = c(0, 7.5), breaks = seq(0, 8), minor_breaks = seq(0, 8, 0.25)) +\n", + " scale_y_continuous(limits = c(0, 7.5), breaks = seq(0, 8), minor_breaks = seq(0, 8, 0.25)) +\n", + " theme(text = element_text(size = 20))\n", + "base " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "d61fea0714dff7819361e1783a0ae096", + "grade": false, + "grade_id": "cell-158e53f25ab76890", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Now consider these three **potential** lines we could fit for the same dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "afece69f7ae5bd9698ddd517494f354a", + "grade": false, + "grade_id": "cell-72d1bbcacfd85b37", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "options(repr.plot.height = 3.5, repr.plot.width = 10)\n", + "line_a <- base +\n", + " ggtitle(\"Line A\") +\n", + " geom_abline(intercept = -0.897, slope = 0.9834, color = \"blue\") +\n", + " theme(text = element_text(size = 20))\n", + "line_b <- base +\n", + " ggtitle(\"Line B\") +\n", + " geom_abline(intercept = 0.1022, slope = 0.9804, color = \"purple\") +\n", + " theme(text = element_text(size = 20))\n", + "line_c <- base +\n", + " ggtitle(\"Line C\") +\n", + " geom_abline(intercept = -0.2347, slope = 0.9164, color = \"green\") +\n", + " theme(text = element_text(size = 20))\n", + "plot_grid(line_a, line_b, line_c, ncol = 3)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "76961602fb170179a7db9f5d5a5524d8", + "grade": false, + "grade_id": "cell-d7bb06c12cba1681", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.0**\n", + "
{points: 1}\n", + "\n", + "Use the graph below titled \"Line A\" to roughly calculate the average squared vertical distance between the points and the blue line. Read values of the graph to a **precision of 0.25** (e.g. 1, 1.25, 1.5, 1.75, 2). Save your answer to a variable named `answer2.0`. \n", + "\n", + "*We reprint the plot for you in a larger size to make it easier to estimate the locations on the graph.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "a063f46b4bc24b65cf63dc98fe1d847c", + "grade": false, + "grade_id": "cell-4d84f8c3727420a2", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "#run this code\n", + "options(repr.plot.width = 9, repr.plot.height = 9)\n", + "line_a" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "7b96aa9e4ee450dba46cb8c4faf86ede", + "grade": false, + "grade_id": "cell-6cf53cb30ae3cd16", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer2.0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "2eb0aea7e3990bcf2277e1dba4b5b51e", + "grade": true, + "grade_id": "cell-9d0a3c3a16e0f47b", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of round(answer2.0, 2) is not numeric\"= setequal(digest(paste(toString(class(round(answer2.0, 2))), \"c421d\")), \"60b51a2c6c08e835154fbd3a85eafb69\"))\n", + "stopifnot(\"value of round(answer2.0, 2) is not correct (rounded to 2 decimal places)\"= setequal(digest(paste(toString(round(round(answer2.0, 2), 2)), \"c421d\")), \"b5753e2357b9e3c512b25411a68fdd61\"))\n", + "stopifnot(\"length of round(answer2.0, 2) is not correct\"= setequal(digest(paste(toString(length(round(answer2.0, 2))), \"c421d\")), \"aae0eb53bb949b3c6130cd8e6a07f130\"))\n", + "stopifnot(\"values of round(answer2.0, 2) are not correct\"= setequal(digest(paste(toString(sort(round(round(answer2.0, 2), 2))), \"c421d\")), \"b5753e2357b9e3c512b25411a68fdd61\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "6b0ec9b6ebc65c04901b41672a826102", + "grade": false, + "grade_id": "cell-7ef45d3d7c403c81", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.1**\n", + "
{points: 1}\n", + "\n", + "Use the graph titled \"Line B\" to roughly calculate the average squared vertical distance between the points and the purple line. Read values of the graph to a **precision of 0.25** (e.g. 1, 1.25, 1.5, 1.75, 2). Save your answer to a variable named `answer2.1`. \n", + "\n", + "*We reprint the plot for you in a larger size to make it easier to estimate the locations on the graph.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "444b814cf0b07058cd7a96d78d16878f", + "grade": false, + "grade_id": "cell-be8bd2be4d762d37", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "options(repr.plot.width = 9, repr.plot.height = 9)\n", + "line_b" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "334b4c6cae6c3456dd810592a4d52ad0", + "grade": false, + "grade_id": "cell-be5564f4bd6cd576", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer2.1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "82f34209479b394700790fc00386743b", + "grade": true, + "grade_id": "cell-840f1140c7655088", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of round(answer2.1, 2) is not numeric\"= setequal(digest(paste(toString(class(round(answer2.1, 2))), \"26b01\")), \"73fe4642b108c0c6fb9e31c5638f220d\"))\n", + "stopifnot(\"value of round(answer2.1, 2) is not correct (rounded to 2 decimal places)\"= setequal(digest(paste(toString(round(round(answer2.1, 2), 2)), \"26b01\")), \"7450db9b8c0f6a7562640316dfa0bc01\"))\n", + "stopifnot(\"length of round(answer2.1, 2) is not correct\"= setequal(digest(paste(toString(length(round(answer2.1, 2))), \"26b01\")), \"371c4d802308bc29de8f5243443264fb\"))\n", + "stopifnot(\"values of round(answer2.1, 2) are not correct\"= setequal(digest(paste(toString(sort(round(round(answer2.1, 2), 2))), \"26b01\")), \"7450db9b8c0f6a7562640316dfa0bc01\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "0865e28d085d526f20585bc4291aa24b", + "grade": false, + "grade_id": "cell-69f334fbd3120d91", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.2** \n", + "
{points: 1}\n", + "\n", + "Use the graph titled \"Line C\" to roughly calculate the average squared vertical distance between the points and the green line. Read values of the graph to a **precision of 0.25** (e.g. 1, 1.25, 1.5, 1.75, 2). Save your answer to a variable named `answer2.2`. \n", + "\n", + "*We reprint the plot for you in a larger size to make it easier to estimate the locations on the graph.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "bf9b8c6e27fa701139200bfc1c3b4710", + "grade": false, + "grade_id": "cell-0634261679ff7469", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "options(repr.plot.width = 9, repr.plot.height = 9)\n", + "line_c" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "e551479b4f19ab9328aa8b172ab46c9e", + "grade": false, + "grade_id": "cell-3aed32faefe82978", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "answer2.2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "5a32ee2a0224a1cbb04ba4d91910b608", + "grade": true, + "grade_id": "cell-3e544bd712b4d796", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of round(answer2.2) is not numeric\"= setequal(digest(paste(toString(class(round(answer2.2))), \"b6f4e\")), \"a9ff9b86408482a29adf68825849a2fc\"))\n", + "stopifnot(\"value of round(answer2.2) is not correct (rounded to 2 decimal places)\"= setequal(digest(paste(toString(round(round(answer2.2), 2)), \"b6f4e\")), \"10d782e505a4d946c033ae64a61f01d5\"))\n", + "stopifnot(\"length of round(answer2.2) is not correct\"= setequal(digest(paste(toString(length(round(answer2.2))), \"b6f4e\")), \"787c683899af26cd449f1d7825f44c94\"))\n", + "stopifnot(\"values of round(answer2.2) are not correct\"= setequal(digest(paste(toString(sort(round(round(answer2.2), 2))), \"b6f4e\")), \"10d782e505a4d946c033ae64a61f01d5\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "4fecdd64a1522109a436c961d7ee241f", + "grade": false, + "grade_id": "cell-ca359fbdc2020d22", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 2.3**\n", + "
{points: 1}\n", + "\n", + "Based on your calculations above, which line would linear regression by ordinary least squares choose given our small and simple dataset? Line A, B or C? Assign the letter that corresponds the line to a variable named `answer2.3`. Make sure you put quotations around the letter and pay attention to case." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "3213e56ff5f24a8c6ddf10c6680d10f4", + "grade": false, + "grade_id": "cell-c3bc0fc1f61fb31b", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "81314417b4cbf6c1b22ac22128ee0b79", + "grade": true, + "grade_id": "cell-4ae1ac995c661109", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer2.3 is not character\"= setequal(digest(paste(toString(class(answer2.3)), \"6e40a\")), \"31581ca444e3e522b6799848cc7d57b4\"))\n", + "stopifnot(\"length of answer2.3 is not correct\"= setequal(digest(paste(toString(length(answer2.3)), \"6e40a\")), \"46ae7667ffe66fc339f6d8552bf49ec5\"))\n", + "stopifnot(\"value of answer2.3 is not correct\"= setequal(digest(paste(toString(tolower(answer2.3)), \"6e40a\")), \"c3b8ffae6f3f1f2abc0c0e8c62432cc8\"))\n", + "stopifnot(\"letters in string value of answer2.3 are correct but case is not correct\"= setequal(digest(paste(toString(answer2.3), \"6e40a\")), \"7dac3ef201af4beeadb8007c86d45e40\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "49453d76b4c2fd8fe5322b4875396ce0", + "grade": false, + "grade_id": "cell-c450e26cb57e9dc9", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "## Marathon Training Revisited with Linear Regression!\n", + "\n", + "\n", + "\n", + "Source: https://media.giphy.com/media/BDagLpxFIm3SM/giphy.gif\n", + "\n", + "Remember our question from last week: what features predict whether athletes will perform better than others? Specifically, we are interested in marathon runners, and looking at how the maximum distance ran per week during training predicts the time it takes a runner to end the race? \n", + "\n", + "This time around, however, we will analyze the data using simple linear regression rather than $k$-nn regression. In the end, we will compare our results to what we found last week with $k$-nn regression." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "b871fcac04015c642b4c42f5908d6912", + "grade": false, + "grade_id": "cell-8655bd26820bea69", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.0**\n", + "
{points: 1}\n", + "\n", + "Load the `marathon` data and assign it to an object called `marathon`. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "d1a30ea92494ef32d2b05174ef0d8938", + "grade": false, + "grade_id": "cell-7429888f4a5a274a", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "marathon" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "8c2a3c9d3034fd0f6f44deb166a2f39f", + "grade": true, + "grade_id": "cell-36eda8d5f9545c0e", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"marathon should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon)), \"1acc1\")), \"ae70b371b8f45716725ad6ed47342dfc\"))\n", + "stopifnot(\"dimensions of marathon are not correct\"= setequal(digest(paste(toString(dim(marathon)), \"1acc1\")), \"b9802a9b3dddfe0ee07a7e2f6e7d0cdc\"))\n", + "stopifnot(\"column names of marathon are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon))), \"1acc1\")), \"a0d9dead321d7baca4daef454c2ba1e5\"))\n", + "stopifnot(\"types of columns in marathon are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon, class)))), \"1acc1\")), \"2b2cd994f5a194a03aa1042b91121b6e\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon, is.numeric))) sort(round(sapply(marathon[, sapply(marathon, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"1acc1\")), \"d6e3e150abbd8e2ff830c3a43e8fddc0\"))\n", + "stopifnot(\"values in one or more character columns in marathon are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon, is.character))) sum(sapply(marathon[sapply(marathon, is.character)], function(x) length(unique(x)))) else 0), \"1acc1\")), \"897000072cc9ce82681f2c1dd68b3a00\"))\n", + "stopifnot(\"values in one or more factor columns in marathon are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon, is.factor))) sum(sapply(marathon[, sapply(marathon, is.factor)], function(col) length(unique(col)))) else 0), \"1acc1\")), \"897000072cc9ce82681f2c1dd68b3a00\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "1bdfd007331d2a152db48c3bbb51d04c", + "grade": false, + "grade_id": "cell-439c2ccda058add2", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.1**\n", + "
{points: 1}\n", + "\n", + "Similar to what we have done for the last few weeks, we will first split the dataset into the training and testing datasets, using 75% of the original data as the training data. Remember, we will be putting the test dataset away in a 'lock box' that we will comeback to later after we choose our final model. In the `strata` argument of the `initial_split` function, place the variable we are trying to predict. Assign your split dataset to an object named `marathon_split`. \n", + "\n", + "Assign your training dataset to an object named `marathon_training` and your testing dataset to an object named `marathon_testing`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "867d9e669e311f27397efb3b1e4878bc", + "grade": false, + "grade_id": "cell-4862845e3f18eced", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "set.seed(2000) # DO NOT CHANGE THIS\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "6bc3ab4c555bab5f742c15a576e39df4", + "grade": true, + "grade_id": "cell-b1eb46161667bcec", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of 'rsplit' %in% class(marathon_split) is not logical\"= setequal(digest(paste(toString(class('rsplit' %in% class(marathon_split))), \"71ac4\")), \"870c923565a346b32afd6edd74a1fcc4\"))\n", + "stopifnot(\"logical value of 'rsplit' %in% class(marathon_split) is not correct\"= setequal(digest(paste(toString('rsplit' %in% class(marathon_split)), \"71ac4\")), \"c128f7d08e1a97bd5d7130fe20bae29f\"))\n", + "\n", + "stopifnot(\"marathon_training should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon_training)), \"71ac5\")), \"42de1b199d3ae516182745019109542b\"))\n", + "stopifnot(\"dimensions of marathon_training are not correct\"= setequal(digest(paste(toString(dim(marathon_training)), \"71ac5\")), \"dba79e5d0afe650395d1750b8bf39259\"))\n", + "stopifnot(\"column names of marathon_training are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon_training))), \"71ac5\")), \"0cab7459f91e73487ade5163168e4af7\"))\n", + "stopifnot(\"types of columns in marathon_training are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon_training, class)))), \"71ac5\")), \"42b6d84ea44fe5d15bebb1297ad9326a\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon_training are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_training, is.numeric))) sort(round(sapply(marathon_training[, sapply(marathon_training, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"71ac5\")), \"fa950c5c01ce9c933b6f68f1817b2db9\"))\n", + "stopifnot(\"values in one or more character columns in marathon_training are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_training, is.character))) sum(sapply(marathon_training[sapply(marathon_training, is.character)], function(x) length(unique(x)))) else 0), \"71ac5\")), \"30421dcc571f50e1cf88d0ab99a762bb\"))\n", + "stopifnot(\"values in one or more factor columns in marathon_training are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_training, is.factor))) sum(sapply(marathon_training[, sapply(marathon_training, is.factor)], function(col) length(unique(col)))) else 0), \"71ac5\")), \"30421dcc571f50e1cf88d0ab99a762bb\"))\n", + "\n", + "stopifnot(\"marathon_testing should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon_testing)), \"71ac6\")), \"e04fd132545851d3ea14dd4f5aa6e235\"))\n", + "stopifnot(\"dimensions of marathon_testing are not correct\"= setequal(digest(paste(toString(dim(marathon_testing)), \"71ac6\")), \"c9610fb3584cf217219e358ef972e7e4\"))\n", + "stopifnot(\"column names of marathon_testing are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon_testing))), \"71ac6\")), \"22c3445ff5c2cd059b263b6b79f27ab2\"))\n", + "stopifnot(\"types of columns in marathon_testing are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon_testing, class)))), \"71ac6\")), \"e119200748401e4b4bec40953e8bc6df\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon_testing are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_testing, is.numeric))) sort(round(sapply(marathon_testing[, sapply(marathon_testing, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"71ac6\")), \"3e0e18b649424c4c945ca0145bef2800\"))\n", + "stopifnot(\"values in one or more character columns in marathon_testing are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_testing, is.character))) sum(sapply(marathon_testing[sapply(marathon_testing, is.character)], function(x) length(unique(x)))) else 0), \"71ac6\")), \"6b860e416b6e8e441818ec6be5d4995d\"))\n", + "stopifnot(\"values in one or more factor columns in marathon_testing are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_testing, is.factor))) sum(sapply(marathon_testing[, sapply(marathon_testing, is.factor)], function(col) length(unique(col)))) else 0), \"71ac6\")), \"6b860e416b6e8e441818ec6be5d4995d\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "ada7d473076efb49c499804ec2b64090", + "grade": false, + "grade_id": "cell-0ebfe08674f42eae", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.2**\n", + "
{points: 1}\n", + "\n", + "Using only the observations in the training dataset, create a scatterplot to assess the relationship between race time (`time_hrs`) and maximum distance ran per week during training (`max`). Put `time_hrs` on the y-axis and `max` on the x-axis. Assign this plot to an object called `marathon_eda`. Remember to do whatever is necessary to make this an effective visualization." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "7f2e15b170d6d4bf45138d839d8b6a42", + "grade": false, + "grade_id": "cell-4a1c52e071e0b23e", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "options(repr.plot.height = 8, repr.plot.width = 7)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "marathon_eda" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "9fe7eedbc69e1feea544ecaa584f0960", + "grade": true, + "grade_id": "cell-883edd273699e4b7", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of plot is not correct (if you are using two types of geoms, try flipping the order of the geom objects!)\"= setequal(digest(paste(toString(sapply(seq_len(length(marathon_eda$layers)), function(i) {c(class(marathon_eda$layers[[i]]$geom))[1]})), \"11fac\")), \"4459ab0fafff46beffafd74322e39f33\"))\n", + "stopifnot(\"variable x is not correct\"= setequal(digest(paste(toString(unlist(lapply(sapply(seq_len(length(marathon_eda$layers)), function(i) {rlang::get_expr(c(marathon_eda$layers[[i]]$mapping, marathon_eda$mapping)$x)}), as.character))), \"11fac\")), \"ff45305bbbd821a02854aa03e229b841\"))\n", + "stopifnot(\"variable y is not correct\"= setequal(digest(paste(toString(unlist(lapply(sapply(seq_len(length(marathon_eda$layers)), function(i) {rlang::get_expr(c(marathon_eda$layers[[i]]$mapping, marathon_eda$mapping)$y)}), as.character))), \"11fac\")), \"63e3299ade73457551067f0c882c5a8c\"))\n", + "stopifnot(\"x-axis label is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_eda$layers[[1]]$mapping, marathon_eda$mapping)$x)!= marathon_eda$labels$x), \"11fac\")), \"0f8c484bf7bc498eb68af14f6135c7ce\"))\n", + "stopifnot(\"y-axis label is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_eda$layers[[1]]$mapping, marathon_eda$mapping)$y)!= marathon_eda$labels$y), \"11fac\")), \"0f8c484bf7bc498eb68af14f6135c7ce\"))\n", + "stopifnot(\"incorrect colour variable in marathon_eda, specify a correct one if required\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_eda$layers[[1]]$mapping, marathon_eda$mapping)$colour)), \"11fac\")), \"f720d575ebde6c6d0020075d65c620ba\"))\n", + "stopifnot(\"incorrect shape variable in marathon_eda, specify a correct one if required\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_eda$layers[[1]]$mapping, marathon_eda$mapping)$shape)), \"11fac\")), \"f720d575ebde6c6d0020075d65c620ba\"))\n", + "stopifnot(\"the colour label in marathon_eda is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_eda$layers[[1]]$mapping, marathon_eda$mapping)$colour) != marathon_eda$labels$colour), \"11fac\")), \"f720d575ebde6c6d0020075d65c620ba\"))\n", + "stopifnot(\"the shape label in marathon_eda is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(marathon_eda$layers[[1]]$mapping, marathon_eda$mapping)$colour) != marathon_eda$labels$shape), \"11fac\")), \"f720d575ebde6c6d0020075d65c620ba\"))\n", + "stopifnot(\"fill variable in marathon_eda is not correct\"= setequal(digest(paste(toString(quo_name(marathon_eda$mapping$fill)), \"11fac\")), \"4dd26f2adb5019221d751562ad1605ea\"))\n", + "stopifnot(\"fill label in marathon_eda is not informative\"= setequal(digest(paste(toString((quo_name(marathon_eda$mapping$fill) != marathon_eda$labels$fill)), \"11fac\")), \"f720d575ebde6c6d0020075d65c620ba\"))\n", + "stopifnot(\"position argument in marathon_eda is not correct\"= setequal(digest(paste(toString(class(marathon_eda$layers[[1]]$position)[1]), \"11fac\")), \"7bc0a64ae1fbaf25265604c8c06f747f\"))\n", + "\n", + "stopifnot(\"marathon_eda$data should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon_eda$data)), \"11fad\")), \"f45fafe1072c34829ea6fb8e233b9781\"))\n", + "stopifnot(\"dimensions of marathon_eda$data are not correct\"= setequal(digest(paste(toString(dim(marathon_eda$data)), \"11fad\")), \"1300556bdc994b3feb7c50db1edef550\"))\n", + "stopifnot(\"column names of marathon_eda$data are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon_eda$data))), \"11fad\")), \"5f03d7c878564320eb15d49ccdbc6adb\"))\n", + "stopifnot(\"types of columns in marathon_eda$data are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon_eda$data, class)))), \"11fad\")), \"55d9d03b6a1fdb2d57671eaf35751163\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon_eda$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_eda$data, is.numeric))) sort(round(sapply(marathon_eda$data[, sapply(marathon_eda$data, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"11fad\")), \"1dead0c6529f779942736f60674097c6\"))\n", + "stopifnot(\"values in one or more character columns in marathon_eda$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_eda$data, is.character))) sum(sapply(marathon_eda$data[sapply(marathon_eda$data, is.character)], function(x) length(unique(x)))) else 0), \"11fad\")), \"d0c94533be178ca3d7d3f13fae051b9e\"))\n", + "stopifnot(\"values in one or more factor columns in marathon_eda$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_eda$data, is.factor))) sum(sapply(marathon_eda$data[, sapply(marathon_eda$data, is.factor)], function(col) length(unique(col)))) else 0), \"11fad\")), \"d0c94533be178ca3d7d3f13fae051b9e\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "d195b1a04cc840d00ca166ae12c124ca", + "grade": false, + "grade_id": "cell-c66b91eb433ebef7", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.3**\n", + "
{points: 1}\n", + "\n", + "Now that we have our training data, the next step is to build a linear regression model specification. Thankfully, building other model specifications is quite straightforward since we will still go through the same procedure (indicate the function, the engine and the mode). \n", + "\n", + "Instead of using the `nearest_neighbor` function, we will be using the `linear_reg` function to let `tidymodels` know we want to perform a linear regression. In the `set_engine` function, we have typically set `\"kknn\"` there for $k$-nn. Since we are doing a linear regression here, set `\"lm\"` as the engine. Finally, instead of setting `\"classification\"` as the mode, set `\"regression\"` as the mode. \n", + "\n", + "Assign your answer to an object named `lm_spec`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "1530ffd4f16d72b585967be1e5cc154c", + "grade": false, + "grade_id": "cell-9736241c0c2966b9", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "31561987a5c3b62338c5cb3e308070ff", + "grade": true, + "grade_id": "cell-a647adab28a3dfb2", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"lm_spec should be a model specification\"= setequal(digest(paste(toString('model_spec' %in% class(lm_spec)), \"d0889\")), \"a0dacbf88e81803337b8c7997b77466e\"))\n", + "stopifnot(\"model specification in lm_spec is not correct\"= setequal(digest(paste(toString(lm_spec$mode), \"d0889\")), \"875da79c33870ef8b6ae4c843ba107b1\"))\n", + "stopifnot(\"computational engine in lm_spec is not correct\"= setequal(digest(paste(toString(lm_spec$engine), \"d0889\")), \"574bc5e346e71d4f46b8c690ef333ed1\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "f5dab0b1a9c4ff92a05207dba7fe1c6f", + "grade": false, + "grade_id": "cell-29d06cfa3e0ab3bc", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.3.1**\n", + "
{points: 1}\n", + "\n", + "After we have created our linear regression model specification, the next step is to create a recipe, establish a workflow analysis and fit our simple linear regression model. \n", + "\n", + "First, create a recipe with the variables of interest (race time and max weekly training distance) using the training dataset and assign your answer to an object named `lm_recipe`. \n", + "\n", + "Then, create a workflow analysis with our model specification and recipe. Remember to fit in the training dataset as well. Assign your answer to an object named `lm_fit`. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "303c4ba43630124883d8c60b0aa8039c", + "grade": false, + "grade_id": "cell-72f6968d56ed879f", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#... <- recipe(... ~ ..., data = ...)\n", + "\n", + "#... <- workflow() |>\n", + "# add_recipe(...) |>\n", + "# add_model(...) |>\n", + "# fit(...)\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "lm_fit" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "9586012afa3a9bef9434b9116dcf2119", + "grade": true, + "grade_id": "cell-9d3b1ff7fdd3faa5", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"lm_recipe should be a recipe\"= setequal(digest(paste(toString('recipe' %in% class(lm_recipe)), \"75481\")), \"9f9d6a2ddbcb5fa648535344804606f9\"))\n", + "stopifnot(\"response variable of lm_recipe is not correct\"= setequal(digest(paste(toString(sort(filter(lm_recipe$var_info, role == 'outcome')$variable)), \"75481\")), \"b8ecc843c3922304280df63250a47bf0\"))\n", + "stopifnot(\"predictor variable(s) of lm_recipe are not correct\"= setequal(digest(paste(toString(sort(filter(lm_recipe$var_info, role == 'predictor')$variable)), \"75481\")), \"adbdaeb8f5967844a99791b00b963cd0\"))\n", + "stopifnot(\"lm_recipe does not contain the correct data, might need to be standardized\"= setequal(digest(paste(toString(round(sum(bake(prep(lm_recipe), lm_recipe$template) %>% select_if(is.numeric), na.rm = TRUE), 2)), \"75481\")), \"1f4c1cba79358be26d7b63a420618b8b\"))\n", + "\n", + "stopifnot(\"lm_fit should be a workflow\"= setequal(digest(paste(toString('workflow' %in% class(lm_fit)), \"75482\")), \"cce7000d9ae3d342f9246a159eecb980\"))\n", + "stopifnot(\"computational engine used in lm_fit is not correct\"= setequal(digest(paste(toString(lm_fit$fit$actions$model$spec$engine), \"75482\")), \"fe034b6280904548b52fc19abbe6edff\"))\n", + "stopifnot(\"model specification used in lm_fit is not correct\"= setequal(digest(paste(toString(lm_fit$fit$actions$model$spec$mode), \"75482\")), \"3cae09094e67e2f99a265c29a8d3918b\"))\n", + "stopifnot(\"lm_fit must be a trained workflow, make sure to call the fit() function\"= setequal(digest(paste(toString(lm_fit$trained), \"75482\")), \"cce7000d9ae3d342f9246a159eecb980\"))\n", + "stopifnot(\"predictor variable(s) of lm_fit are not correct\"= setequal(digest(paste(toString(sort(filter(lm_fit$pre$actions$recipe$recipe$var_info, role == 'predictor')$variable)), \"75482\")), \"ff0c2ccbae3e98ee9211ee70e3ad1e8a\"))\n", + "stopifnot(\"lm_fit does not contain the correct data\"= setequal(digest(paste(toString(sort(vapply(lm_fit$pre$mold$predictors[, sapply(lm_fit$pre$mold$predictors, is.numeric)], function(col) if(!is.null(col)) round(sum(col), 2) else NA_real_, numeric(1)), na.last = NA)), \"75482\")), \"be94b1f7d4616144f9d51afb14b8b17f\"))\n", + "stopifnot(\"did not fit lm_fit on the training dataset\"= setequal(digest(paste(toString(nrow(lm_fit$pre$mold$outcomes)), \"75482\")), \"ee77ee2ba1694b3dc410cf44a5e0efce\"))\n", + "stopifnot(\"for classification/regression models, weight function is not correct\"= setequal(digest(paste(toString(quo_name(lm_fit$fit$actions$model$spec$args$weight_func)), \"75482\")), \"a2a2f068d8e40043fd0b88d09299bd2e\"))\n", + "stopifnot(\"for classification/regression models, response variable of lm_fit is not correct\"= setequal(digest(paste(toString(sort(filter(lm_fit$pre$actions$recipe$recipe$var_info, role == 'outcome')$variable)), \"75482\")), \"5d2ee0077f65d653f1f8d63a42599054\"))\n", + "stopifnot(\"for KNN models, number of neighbours is not correct\"= setequal(digest(paste(toString(quo_name(lm_fit$fit$actions$model$spec$args$neighbors)), \"75482\")), \"a2a2f068d8e40043fd0b88d09299bd2e\"))\n", + "stopifnot(\"for clustering models, the clustering is not correct\"= setequal(digest(paste(toString(lm_fit$fit$fit$fit$cluster), \"75482\")), \"c2b79e4fa40c9504e66f260d9a9b56c6\"))\n", + "stopifnot(\"for clustering models, the total within-cluster sum-of-squared distances is not correct\"= setequal(digest(paste(toString(if (!is.null(lm_fit$fit$fit$fit$tot.withinss)) round(lm_fit$fit$fit$fit$tot.withinss, 2) else NULL), \"75482\")), \"c2b79e4fa40c9504e66f260d9a9b56c6\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "1de9e767bea811b0f51ac4725b4b4b84", + "grade": false, + "grade_id": "cell-dad798df6163dff4", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.4**\n", + "
{points: 1}\n", + "\n", + "Now, let's visualize the model predictions as a straight line overlaid on the training data. Use the `predict` and `bind_cols` functions on `lm_fit` to create predictions for the `marathon_training` data. Name the resulting data frame `marathon_preds`.\n", + "\n", + "Next, create a scatterplot with the marathon time (y-axis) against the maximum distance run per week (x-axis) from `marathon_preds`. Use an alpha value of 0.4 to avoid overplotting. **Plot the predictions as a blue line over the data points.** Assign your plot to a variable called `lm_predictions`. Remember the fundamentals of effective visualizations such as having a human-readable axes titles. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "4cbea9991bcdf02b0340eed13dd5be0a", + "grade": false, + "grade_id": "cell-6131349a47c37876", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "options(repr.plot.width = 8, repr.plot.height = 7)\n", + "\n", + "# marathon_preds <- ... |>\n", + "# predict(...) |>\n", + "# bind_cols(...)\n", + "#\n", + "# lm_predictions <- marathon_preds |>\n", + "# ...(aes(x = ..., y = ...)) +\n", + "# geom_point(... = 0.4) +\n", + "# geom_line(\n", + "# mapping = aes(x = ..., y = ...), \n", + "# color = \"blue\") +\n", + "# xlab(\"...\") +\n", + "# ylab(\"...\") +\n", + "# theme(text = ...(size = 20))\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "lm_predictions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "c2acd42915d79e6c2235871620ec0fd8", + "grade": true, + "grade_id": "cell-4664079ebe7d0892", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"marathon_preds should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(marathon_preds)), \"92a43\")), \"9da11dcd29eb47f90ac1aefabe6c9455\"))\n", + "stopifnot(\"dimensions of marathon_preds are not correct\"= setequal(digest(paste(toString(dim(marathon_preds)), \"92a43\")), \"74088c002f1026d9e09caf9aa1594fa8\"))\n", + "stopifnot(\"column names of marathon_preds are not correct\"= setequal(digest(paste(toString(sort(colnames(marathon_preds))), \"92a43\")), \"3d11ae5f1d1aa863b880dfa9c0306f8d\"))\n", + "stopifnot(\"types of columns in marathon_preds are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(marathon_preds, class)))), \"92a43\")), \"dc8fa49c861bb94fc17b26fcd8616273\"))\n", + "stopifnot(\"values in one or more numerical columns in marathon_preds are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_preds, is.numeric))) sort(round(sapply(marathon_preds[, sapply(marathon_preds, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"92a43\")), \"2e034bbbff8419bdc580d52b81bff2ba\"))\n", + "stopifnot(\"values in one or more character columns in marathon_preds are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_preds, is.character))) sum(sapply(marathon_preds[sapply(marathon_preds, is.character)], function(x) length(unique(x)))) else 0), \"92a43\")), \"af2001c6c829297968ba45ced5235ec8\"))\n", + "stopifnot(\"values in one or more factor columns in marathon_preds are not correct\"= setequal(digest(paste(toString(if (any(sapply(marathon_preds, is.factor))) sum(sapply(marathon_preds[, sapply(marathon_preds, is.factor)], function(col) length(unique(col)))) else 0), \"92a43\")), \"af2001c6c829297968ba45ced5235ec8\"))\n", + "\n", + "stopifnot(\"type of plot is not correct (if you are using two types of geoms, try flipping the order of the geom objects!)\"= setequal(digest(paste(toString(sapply(seq_len(length(lm_predictions$layers)), function(i) {c(class(lm_predictions$layers[[i]]$geom))[1]})), \"92a44\")), \"fb744d3c7ec20538e4f5cd822395cc85\"))\n", + "stopifnot(\"variable x is not correct\"= setequal(digest(paste(toString(unlist(lapply(sapply(seq_len(length(lm_predictions$layers)), function(i) {rlang::get_expr(c(lm_predictions$layers[[i]]$mapping, lm_predictions$mapping)$x)}), as.character))), \"92a44\")), \"74c2844a5f83b6daa40aaec67fa2e92a\"))\n", + "stopifnot(\"variable y is not correct\"= setequal(digest(paste(toString(unlist(lapply(sapply(seq_len(length(lm_predictions$layers)), function(i) {rlang::get_expr(c(lm_predictions$layers[[i]]$mapping, lm_predictions$mapping)$y)}), as.character))), \"92a44\")), \"1ddf82aafb1c88458884d9df5a673917\"))\n", + "stopifnot(\"x-axis label is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions$layers[[1]]$mapping, lm_predictions$mapping)$x)!= lm_predictions$labels$x), \"92a44\")), \"9d3822c574af2fed6d28ab707c284cce\"))\n", + "stopifnot(\"y-axis label is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions$layers[[1]]$mapping, lm_predictions$mapping)$y)!= lm_predictions$labels$y), \"92a44\")), \"9d3822c574af2fed6d28ab707c284cce\"))\n", + "stopifnot(\"incorrect colour variable in lm_predictions, specify a correct one if required\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions$layers[[1]]$mapping, lm_predictions$mapping)$colour)), \"92a44\")), \"b2107b37b60426a99544ad984154f1a3\"))\n", + "stopifnot(\"incorrect shape variable in lm_predictions, specify a correct one if required\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions$layers[[1]]$mapping, lm_predictions$mapping)$shape)), \"92a44\")), \"b2107b37b60426a99544ad984154f1a3\"))\n", + "stopifnot(\"the colour label in lm_predictions is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions$layers[[1]]$mapping, lm_predictions$mapping)$colour) != lm_predictions$labels$colour), \"92a44\")), \"b2107b37b60426a99544ad984154f1a3\"))\n", + "stopifnot(\"the shape label in lm_predictions is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions$layers[[1]]$mapping, lm_predictions$mapping)$colour) != lm_predictions$labels$shape), \"92a44\")), \"b2107b37b60426a99544ad984154f1a3\"))\n", + "stopifnot(\"fill variable in lm_predictions is not correct\"= setequal(digest(paste(toString(quo_name(lm_predictions$mapping$fill)), \"92a44\")), \"8fc821fdbb622da7ac6e6263a813b77f\"))\n", + "stopifnot(\"fill label in lm_predictions is not informative\"= setequal(digest(paste(toString((quo_name(lm_predictions$mapping$fill) != lm_predictions$labels$fill)), \"92a44\")), \"b2107b37b60426a99544ad984154f1a3\"))\n", + "stopifnot(\"position argument in lm_predictions is not correct\"= setequal(digest(paste(toString(class(lm_predictions$layers[[1]]$position)[1]), \"92a44\")), \"44da79b989930014234165d79853228d\"))\n", + "\n", + "stopifnot(\"lm_predictions$data should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(lm_predictions$data)), \"92a45\")), \"5b824784ce6d25ac0d227cfbc08effb6\"))\n", + "stopifnot(\"dimensions of lm_predictions$data are not correct\"= setequal(digest(paste(toString(dim(lm_predictions$data)), \"92a45\")), \"85a2b44d298b323abf26964fcdec67cb\"))\n", + "stopifnot(\"column names of lm_predictions$data are not correct\"= setequal(digest(paste(toString(sort(colnames(lm_predictions$data))), \"92a45\")), \"98268a0def9229d53e704e820f4b9fb7\"))\n", + "stopifnot(\"types of columns in lm_predictions$data are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(lm_predictions$data, class)))), \"92a45\")), \"6e358fb6c724c4683b8bc75631365183\"))\n", + "stopifnot(\"values in one or more numerical columns in lm_predictions$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(lm_predictions$data, is.numeric))) sort(round(sapply(lm_predictions$data[, sapply(lm_predictions$data, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"92a45\")), \"249f484e2bd4edf34b3344f3dbbccac3\"))\n", + "stopifnot(\"values in one or more character columns in lm_predictions$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(lm_predictions$data, is.character))) sum(sapply(lm_predictions$data[sapply(lm_predictions$data, is.character)], function(x) length(unique(x)))) else 0), \"92a45\")), \"17108f98507ffe522490eb8f6ccdd547\"))\n", + "stopifnot(\"values in one or more factor columns in lm_predictions$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(lm_predictions$data, is.factor))) sum(sapply(lm_predictions$data[, sapply(lm_predictions$data, is.factor)], function(col) length(unique(col)))) else 0), \"92a45\")), \"17108f98507ffe522490eb8f6ccdd547\"))\n", + "\n", + "stopifnot(\"type of as.character(lm_predictions$layers[[2]]$aes_params) is not character\"= setequal(digest(paste(toString(class(as.character(lm_predictions$layers[[2]]$aes_params))), \"92a46\")), \"a53836556fa0edf8ff013e891f1f9bd9\"))\n", + "stopifnot(\"length of as.character(lm_predictions$layers[[2]]$aes_params) is not correct\"= setequal(digest(paste(toString(length(as.character(lm_predictions$layers[[2]]$aes_params))), \"92a46\")), \"1483d186549c4800922240ea9731d38e\"))\n", + "stopifnot(\"value of as.character(lm_predictions$layers[[2]]$aes_params) is not correct\"= setequal(digest(paste(toString(tolower(as.character(lm_predictions$layers[[2]]$aes_params))), \"92a46\")), \"96b2060698cde202d2dec4eacaf0ef4a\"))\n", + "stopifnot(\"letters in string value of as.character(lm_predictions$layers[[2]]$aes_params) are correct but case is not correct\"= setequal(digest(paste(toString(as.character(lm_predictions$layers[[2]]$aes_params)), \"92a46\")), \"96b2060698cde202d2dec4eacaf0ef4a\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "51ea2afad1f6ea01d1549e3d933ac38d", + "grade": false, + "grade_id": "cell-c4273b717c7a3cb7", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.5**\n", + "
{points: 1}\n", + "\n", + "Great! We can now see the line of best fit on the graph. Now let's calculate the $RMSPE$ using the **test data**. To get to this point, first, use the `lm_fit` to make predictions on the test data. Remember to bind the appropriate columns for the test data. Afterwards, collect the metrics and store it in an object called `lm_test_results`.\n", + "\n", + "From `lm_test_results`, extract the $RMSPE$ and return a single numerical value. Assign your answer to an object named `lm_rmspe`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "9d5f238c35ed6d87f0820b963ef8e70b", + "grade": false, + "grade_id": "cell-5f42a9ac9068cfdf", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "#... <- lm_fit |>\n", + "# predict(...) |>\n", + "# bind_cols(...) |>\n", + "# metrics(truth = ..., estimate = ..)\n", + "\n", + "#... <- lm_test_results |>\n", + "# filter(...) |>\n", + "# select(...) |>\n", + "# ...\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "lm_rmspe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "6443934a920e993bfe92b39c5786aaa0", + "grade": true, + "grade_id": "cell-96a0627f99b93667", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"lm_test_results should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(lm_test_results)), \"897a7\")), \"b810641e52df8b085ad482ea493a8e75\"))\n", + "stopifnot(\"dimensions of lm_test_results are not correct\"= setequal(digest(paste(toString(dim(lm_test_results)), \"897a7\")), \"9770727e09ce49505b380eeecf31decb\"))\n", + "stopifnot(\"column names of lm_test_results are not correct\"= setequal(digest(paste(toString(sort(colnames(lm_test_results))), \"897a7\")), \"3b139e12b7dcae96919fad7daf30dc88\"))\n", + "stopifnot(\"types of columns in lm_test_results are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(lm_test_results, class)))), \"897a7\")), \"3a394aca9254bf32b1659200c8bf5307\"))\n", + "stopifnot(\"values in one or more numerical columns in lm_test_results are not correct\"= setequal(digest(paste(toString(if (any(sapply(lm_test_results, is.numeric))) sort(round(sapply(lm_test_results[, sapply(lm_test_results, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"897a7\")), \"88bd1c199e045c8b65b425f2deed668b\"))\n", + "stopifnot(\"values in one or more character columns in lm_test_results are not correct\"= setequal(digest(paste(toString(if (any(sapply(lm_test_results, is.character))) sum(sapply(lm_test_results[sapply(lm_test_results, is.character)], function(x) length(unique(x)))) else 0), \"897a7\")), \"855933d0f5e22f016f6baddd4ffaac61\"))\n", + "stopifnot(\"values in one or more factor columns in lm_test_results are not correct\"= setequal(digest(paste(toString(if (any(sapply(lm_test_results, is.factor))) sum(sapply(lm_test_results[, sapply(lm_test_results, is.factor)], function(col) length(unique(col)))) else 0), \"897a7\")), \"1e2484c0e0b3dd4af86bcff5e3d1b54f\"))\n", + "\n", + "stopifnot(\"type of round(lm_rmspe, 2) is not numeric\"= setequal(digest(paste(toString(class(round(lm_rmspe, 2))), \"897a8\")), \"1aa278f1def9d912be586c16e682e781\"))\n", + "stopifnot(\"value of round(lm_rmspe, 2) is not correct (rounded to 2 decimal places)\"= setequal(digest(paste(toString(round(round(lm_rmspe, 2), 2)), \"897a8\")), \"971da5ed9e27704013c9e9add30f4846\"))\n", + "stopifnot(\"length of round(lm_rmspe, 2) is not correct\"= setequal(digest(paste(toString(length(round(lm_rmspe, 2))), \"897a8\")), \"451753c10c17df6c5e9b434ce9093dff\"))\n", + "stopifnot(\"values of round(lm_rmspe, 2) are not correct\"= setequal(digest(paste(toString(sort(round(round(lm_rmspe, 2), 2))), \"897a8\")), \"971da5ed9e27704013c9e9add30f4846\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "703a97273e060a23864acb4febf15f4d", + "grade": false, + "grade_id": "cell-248f1e3467b09c40", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.5.1**\n", + "
{points: 1}\n", + "\n", + "Now, let's visualize the model predictions as a straight line overlaid on the test data. First, create a scatterplot to assess the relationship between race time (`time_hrs`) and maximum distance ran per week during training (`max`) on the **testing data.** Use and alpha value of 0.4 to avoid overplotting. Then add a line to the plot corresponding to the predictions from the fit linear regression model. Remember to do whatever is necessary to make this an effective visualization.\n", + "\n", + "*Assign the plot to an object called `lm_predictions_test`.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "d8e6bd06a9c30160ca9342bae87c9efa", + "grade": false, + "grade_id": "cell-498c80d6fa9369a7", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "options(repr.plot.width = 8, repr.plot.height = 7)\n", + "\n", + "# test_preds <- ...\n", + "\n", + "# lm_predictions_test <- ...\n", + "\n", + "# your code here\n", + "fail() # No Answer - remove if you provide an answer\n", + "lm_predictions_test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "8ac7774e84a41ae09dd036dff5221503", + "grade": true, + "grade_id": "cell-218d130332eebf2a", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"test_preds should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(test_preds)), \"34e17\")), \"b9c2981a0e8d6cf89e98fa7736d5f467\"))\n", + "stopifnot(\"dimensions of test_preds are not correct\"= setequal(digest(paste(toString(dim(test_preds)), \"34e17\")), \"e69a8e0f204485549c3aa56a8d089150\"))\n", + "stopifnot(\"column names of test_preds are not correct\"= setequal(digest(paste(toString(sort(colnames(test_preds))), \"34e17\")), \"4dbd60436948e508ce4d7dc43da278fc\"))\n", + "stopifnot(\"types of columns in test_preds are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(test_preds, class)))), \"34e17\")), \"f4be2b89540633a23ce4264b495756f4\"))\n", + "stopifnot(\"values in one or more numerical columns in test_preds are not correct\"= setequal(digest(paste(toString(if (any(sapply(test_preds, is.numeric))) sort(round(sapply(test_preds[, sapply(test_preds, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"34e17\")), \"4c5890ff8271bd200379f553c61394c1\"))\n", + "stopifnot(\"values in one or more character columns in test_preds are not correct\"= setequal(digest(paste(toString(if (any(sapply(test_preds, is.character))) sum(sapply(test_preds[sapply(test_preds, is.character)], function(x) length(unique(x)))) else 0), \"34e17\")), \"44ecda4ed9d0f455b7d326734d48e06e\"))\n", + "stopifnot(\"values in one or more factor columns in test_preds are not correct\"= setequal(digest(paste(toString(if (any(sapply(test_preds, is.factor))) sum(sapply(test_preds[, sapply(test_preds, is.factor)], function(col) length(unique(col)))) else 0), \"34e17\")), \"44ecda4ed9d0f455b7d326734d48e06e\"))\n", + "\n", + "stopifnot(\"type of plot is not correct (if you are using two types of geoms, try flipping the order of the geom objects!)\"= setequal(digest(paste(toString(sapply(seq_len(length(lm_predictions_test$layers)), function(i) {c(class(lm_predictions_test$layers[[i]]$geom))[1]})), \"34e18\")), \"e996b7462be5db50fddea027a41e5d41\"))\n", + "stopifnot(\"variable x is not correct\"= setequal(digest(paste(toString(unlist(lapply(sapply(seq_len(length(lm_predictions_test$layers)), function(i) {rlang::get_expr(c(lm_predictions_test$layers[[i]]$mapping, lm_predictions_test$mapping)$x)}), as.character))), \"34e18\")), \"dc26821c948a88627636fbaff8ba49fa\"))\n", + "stopifnot(\"variable y is not correct\"= setequal(digest(paste(toString(unlist(lapply(sapply(seq_len(length(lm_predictions_test$layers)), function(i) {rlang::get_expr(c(lm_predictions_test$layers[[i]]$mapping, lm_predictions_test$mapping)$y)}), as.character))), \"34e18\")), \"f859c22b95960db245622f8bda7a7c48\"))\n", + "stopifnot(\"x-axis label is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions_test$layers[[1]]$mapping, lm_predictions_test$mapping)$x)!= lm_predictions_test$labels$x), \"34e18\")), \"d32017ce78d71b2418a43f5c2b1db675\"))\n", + "stopifnot(\"y-axis label is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions_test$layers[[1]]$mapping, lm_predictions_test$mapping)$y)!= lm_predictions_test$labels$y), \"34e18\")), \"d32017ce78d71b2418a43f5c2b1db675\"))\n", + "stopifnot(\"incorrect colour variable in lm_predictions_test, specify a correct one if required\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions_test$layers[[1]]$mapping, lm_predictions_test$mapping)$colour)), \"34e18\")), \"5bb6ade5c7dda10413c785b77ae992f1\"))\n", + "stopifnot(\"incorrect shape variable in lm_predictions_test, specify a correct one if required\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions_test$layers[[1]]$mapping, lm_predictions_test$mapping)$shape)), \"34e18\")), \"5bb6ade5c7dda10413c785b77ae992f1\"))\n", + "stopifnot(\"the colour label in lm_predictions_test is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions_test$layers[[1]]$mapping, lm_predictions_test$mapping)$colour) != lm_predictions_test$labels$colour), \"34e18\")), \"5bb6ade5c7dda10413c785b77ae992f1\"))\n", + "stopifnot(\"the shape label in lm_predictions_test is not descriptive, nicely formatted, or human readable\"= setequal(digest(paste(toString(rlang::get_expr(c(lm_predictions_test$layers[[1]]$mapping, lm_predictions_test$mapping)$colour) != lm_predictions_test$labels$shape), \"34e18\")), \"5bb6ade5c7dda10413c785b77ae992f1\"))\n", + "stopifnot(\"fill variable in lm_predictions_test is not correct\"= setequal(digest(paste(toString(quo_name(lm_predictions_test$mapping$fill)), \"34e18\")), \"a5333b2a9e34a3481fa9ab37923f2179\"))\n", + "stopifnot(\"fill label in lm_predictions_test is not informative\"= setequal(digest(paste(toString((quo_name(lm_predictions_test$mapping$fill) != lm_predictions_test$labels$fill)), \"34e18\")), \"5bb6ade5c7dda10413c785b77ae992f1\"))\n", + "stopifnot(\"position argument in lm_predictions_test is not correct\"= setequal(digest(paste(toString(class(lm_predictions_test$layers[[1]]$position)[1]), \"34e18\")), \"cb745c6bdfda658a5f56a95dbea00f45\"))\n", + "\n", + "stopifnot(\"lm_predictions_test$data should be a data frame\"= setequal(digest(paste(toString('data.frame' %in% class(lm_predictions_test$data)), \"34e19\")), \"d7dad6698210cfacabf62c3bae8b5644\"))\n", + "stopifnot(\"dimensions of lm_predictions_test$data are not correct\"= setequal(digest(paste(toString(dim(lm_predictions_test$data)), \"34e19\")), \"fa31829e597d4b489d7829cfb90bf196\"))\n", + "stopifnot(\"column names of lm_predictions_test$data are not correct\"= setequal(digest(paste(toString(sort(colnames(lm_predictions_test$data))), \"34e19\")), \"7e6df8b1b9cb0ea102b917bf0ba32e62\"))\n", + "stopifnot(\"types of columns in lm_predictions_test$data are not correct\"= setequal(digest(paste(toString(sort(unlist(sapply(lm_predictions_test$data, class)))), \"34e19\")), \"469aeceaad0a86845823d2423e05da29\"))\n", + "stopifnot(\"values in one or more numerical columns in lm_predictions_test$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(lm_predictions_test$data, is.numeric))) sort(round(sapply(lm_predictions_test$data[, sapply(lm_predictions_test$data, is.numeric)], sum, na.rm = TRUE), 2)) else 0), \"34e19\")), \"72dd04320632d407e2e53309710b3ac5\"))\n", + "stopifnot(\"values in one or more character columns in lm_predictions_test$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(lm_predictions_test$data, is.character))) sum(sapply(lm_predictions_test$data[sapply(lm_predictions_test$data, is.character)], function(x) length(unique(x)))) else 0), \"34e19\")), \"cb8aa06bd1d7a4150bc39ea915c0afcc\"))\n", + "stopifnot(\"values in one or more factor columns in lm_predictions_test$data are not correct\"= setequal(digest(paste(toString(if (any(sapply(lm_predictions_test$data, is.factor))) sum(sapply(lm_predictions_test$data[, sapply(lm_predictions_test$data, is.factor)], function(col) length(unique(col)))) else 0), \"34e19\")), \"cb8aa06bd1d7a4150bc39ea915c0afcc\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "d83a8c2e08816813c9fe49068b2f3d6a", + "grade": false, + "grade_id": "cell-3379505df4a3aef4", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.6**\n", + "
{points: 1}\n", + "\n", + "Compare the test RMPSE of k-nn regression (`0.606` from last worksheet) to that of simple linear regression, which is greater? \n", + "\n", + "A. $k$-nn regression has a greater RMSPE\n", + "\n", + "B. Simple linear regression has a greater RMSPE\n", + "\n", + "C. Neither, they are identical\n", + "\n", + "Save the letter of the answer you think is correct to a variable named `answer3.6`. Make sure you put quotations around the letter and pay attention to case." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "5ac95738cb9b73954da0c7aa71f20cac", + "grade": false, + "grade_id": "cell-e633b4ce6799432e", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "fedddf8df734c0c9f8d2cf6652489f82", + "grade": true, + "grade_id": "cell-c180c6230f13243c", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer3.6 is not character\"= setequal(digest(paste(toString(class(answer3.6)), \"1e1fa\")), \"e087a386b27f00004a34aa18a4121dc3\"))\n", + "stopifnot(\"length of answer3.6 is not correct\"= setequal(digest(paste(toString(length(answer3.6)), \"1e1fa\")), \"ae2d406db16c00044acce6f4fbfc8ae2\"))\n", + "stopifnot(\"value of answer3.6 is not correct\"= setequal(digest(paste(toString(tolower(answer3.6)), \"1e1fa\")), \"68ccef2326206925966a99e426d93ac4\"))\n", + "stopifnot(\"letters in string value of answer3.6 are correct but case is not correct\"= setequal(digest(paste(toString(answer3.6), \"1e1fa\")), \"772b8e2399aa39350c32da02b2ae606c\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "55a6c6c328176a349ee3ff64edb84325", + "grade": false, + "grade_id": "cell-34ba4508e97d7316", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.7** Multiple Choice: \n", + "
{points: 1}\n", + "\n", + "Which model does a better job of predicting on the test dataset?\n", + "\n", + "A. $k$-nn regression \n", + "\n", + "B. Simple linear regression \n", + "\n", + "C. Neither, they are identical\n", + "\n", + "Save the letter of the answer you think is correct to a variable named `answer3.7`. Make sure you put quotations around the letter and pay attention to case." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "9a51d640e578f07161efec55c23a645e", + "grade": false, + "grade_id": "cell-e090cdac97461555", + "locked": false, + "schema_version": 3, + "solution": true + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "78370634905ab1845eb2398607033420", + "grade": true, + "grade_id": "cell-9f902420da757d0a", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer3.7 is not character\"= setequal(digest(paste(toString(class(answer3.7)), \"624f2\")), \"3d482a2579ff60cd919540409cdc4bf3\"))\n", + "stopifnot(\"length of answer3.7 is not correct\"= setequal(digest(paste(toString(length(answer3.7)), \"624f2\")), \"a00854f1d6158b8ae184fde0094f66dc\"))\n", + "stopifnot(\"value of answer3.7 is not correct\"= setequal(digest(paste(toString(tolower(answer3.7)), \"624f2\")), \"3bd126b31a6045a72d864108a9877fb2\"))\n", + "stopifnot(\"letters in string value of answer3.7 are correct but case is not correct\"= setequal(digest(paste(toString(answer3.7), \"624f2\")), \"b885703406c472a36caebd8883c84dde\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "a648f83d076164079d8a2aa6ba3ad841", + "grade": false, + "grade_id": "cell-e0b42a79e704b681", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "Given that the linear regression model is a straight line, we can write our model as a mathematical equation. We can get the two numbers we need for this from the coefficients, `(Intercept)` and `time_hrs`. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "a9bff9106a8ff0da7ee39b23504b5bcc", + "grade": false, + "grade_id": "cell-950e45b9b52f7a59", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# run this cell\n", + "lm_fit" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "markdown", + "checksum": "1201b3ad79a96bfb1010e4ebff2b3c8e", + "grade": false, + "grade_id": "cell-936d12f8333ecff6", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "source": [ + "**Question 3.8.1** Multiple Choice: \n", + "
{points: 1}\n", + "\n", + "Which of the following mathematical equations represents the model based on the numbers output in the cell above? \n", + "\n", + "A. $Predicted \\ race \\ time \\ (in \\ hours) = 4.88 - 0.02 * max \\ (in \\ miles)$\n", + "\n", + "B. $Predicted \\ race \\ time \\ (in \\ hours) = -0.02 + 4.88 * max \\ (in \\ miles)$\n", + "\n", + "C. $Predicted \\ max \\ (in \\ miles) = 4.88 - 0.02 * \\ race \\ time \\ (in \\ hours)$\n", + " \n", + "D. $Predicted \\ max \\ (in \\ miles) = -0.02 + 4.88 * \\ race \\ time \\ (in \\ hours)$\n", + "\n", + "Save the letter of the answer you think is correct to a variable named `answer3.8.1`. Make sure you put quotations around the letter and pay attention to case." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "c7048c87837b2ea657cdc7c5138cacc2", + "grade": false, + "grade_id": "cell-e2595c01d8d07897", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "# your code here\n", + "fail() # No Answer - remove if you provide an answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "babee6dcecbc245d813562388fdb3453", + "grade": true, + "grade_id": "cell-5884a2fd9625b2e8", + "locked": true, + "points": 1, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "library(digest)\n", + "stopifnot(\"type of answer3.8.1 is not character\"= setequal(digest(paste(toString(class(answer3.8.1)), \"f0d48\")), \"ea9605462745f55518552cc11ef8d4e2\"))\n", + "stopifnot(\"length of answer3.8.1 is not correct\"= setequal(digest(paste(toString(length(answer3.8.1)), \"f0d48\")), \"5f486c8d36f348ddcab47e29d7675259\"))\n", + "stopifnot(\"value of answer3.8.1 is not correct\"= setequal(digest(paste(toString(tolower(answer3.8.1)), \"f0d48\")), \"cd6167a585ec3f62fe3b556fcaa430ac\"))\n", + "stopifnot(\"letters in string value of answer3.8.1 are correct but case is not correct\"= setequal(digest(paste(toString(answer3.8.1), \"f0d48\")), \"80181f4235abd8a5a976dbeb9f24b4f7\"))\n", + "\n", + "print('Success!')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "nbgrader": { + "cell_type": "code", + "checksum": "0a3c609e5aad862abd5071ad3500be3d", + "grade": false, + "grade_id": "cell-79f423b84cd2fa5c", + "locked": true, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "source('cleanup.R')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "4.3.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}