diff --git a/learntools/sql/ex3.py b/learntools/sql/ex3.py index f9b0f90a7..311652140 100644 --- a/learntools/sql/ex3.py +++ b/learntools/sql/ex3.py @@ -8,8 +8,8 @@ # (1) ProlificCommenters prolific_commenters_query = """ - SELECT author, COUNT(id) AS NumPosts - FROM `bigquery-public-data.hacker_news.comments` + SELECT `by` AS author, COUNT(id) AS NumPosts + FROM `bigquery-public-data.hacker_news.full` GROUP BY author HAVING COUNT(id) > 10000 """ @@ -19,7 +19,7 @@ # (2) NumDeletedPosts deleted_posts_query = """ SELECT COUNT(1) AS num_deleted_posts - FROM `bigquery-public-data.hacker_news.comments` + FROM `bigquery-public-data.hacker_news.full` WHERE deleted = True """ query_job = client.query(deleted_posts_query) @@ -46,8 +46,8 @@ def check(self, results): _solution = CS(\ """ prolific_commenters_query = \""" - SELECT author, COUNT(1) AS NumPosts - FROM `bigquery-public-data.hacker_news.comments` + SELECT `by` AS author, COUNT(1) AS NumPosts + FROM `bigquery-public-data.hacker_news.full` GROUP BY author HAVING COUNT(1) > 10000 \""" @@ -62,7 +62,7 @@ class NumDeletedPosts(EqualityCheckProblem): # Query to determine how many posts were deleted deleted_posts_query = \""" SELECT COUNT(1) AS num_deleted_posts - FROM `bigquery-public-data.hacker_news.comments` + FROM `bigquery-public-data.hacker_news.full` WHERE deleted = True \""" diff --git a/notebooks/sql/raw/ex3.ipynb b/notebooks/sql/raw/ex3.ipynb index a41f7609d..4e0a33697 100644 --- a/notebooks/sql/raw/ex3.ipynb +++ b/notebooks/sql/raw/ex3.ipynb @@ -28,7 +28,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The code cell below fetches the `comments` table from the `hacker_news` dataset. We also preview the first five rows of the table." + "The code cell below fetches the `full` table from the `hacker_news` dataset. We also preview the first five rows of the table." ] }, { @@ -48,13 +48,13 @@ "# API request - fetch the dataset\n", "dataset = client.get_dataset(dataset_ref)\n", "\n", - "# Construct a reference to the \"comments\" table\n", - "table_ref = dataset_ref.table(\"comments\")\n", + "# Construct a reference to the \"full\" table\n", + "table_ref = dataset_ref.table(\"full\")\n", "\n", "# API request - fetch the table\n", "table = client.get_table(table_ref)\n", "\n", - "# Preview the first five lines of the \"comments\" table\n", + "# Preview the first five lines of the table\n", "client.list_rows(table, max_results=5).to_dataframe()" ] }, @@ -72,7 +72,7 @@ "```\n", "query = \"\"\"\n", " SELECT parent, COUNT(1) AS NumPosts\n", - " FROM `bigquery-public-data.hacker_news.comments`\n", + " FROM `bigquery-public-data.hacker_news.full`\n", " GROUP BY parent\n", " HAVING COUNT(1) > 10\n", " \"\"\"\n", @@ -86,7 +86,7 @@ "outputs": [], "source": [ "# Query to select prolific commenters and post counts\n", - "prolific_commenters_query = ____ # Your code goes here\n", + "prolific_commenters_query = \"\"\"____\"\"\" # Your code goes here\n", "\n", "# Set up the query (cancel the query if it would use too much of \n", "# your quota, with the limit set to 1 GB)\n", @@ -125,7 +125,7 @@ "source": [ "### 2) Deleted comments\n", "\n", - "How many comments have been deleted? (If a comment was deleted, the `deleted` column in the comments table will have the value `True`.)" + "How many comments have been deleted? (If a comment was deleted, the `deleted` column in the table will have the value `True`.)" ] }, { diff --git a/notebooks/sql/raw/tut3.ipynb b/notebooks/sql/raw/tut3.ipynb index a906d52e4..dc624491d 100644 --- a/notebooks/sql/raw/tut3.ipynb +++ b/notebooks/sql/raw/tut3.ipynb @@ -50,7 +50,7 @@ "\n", "Ready to see an example on a real dataset? The Hacker News dataset contains information on stories and comments from the Hacker News social networking site. \n", "\n", - "We'll work with the `comments` table and begin by printing the first few rows. (_We have hidden the corresponding code. To take a peek, click on the \"Code\" button below._)" + "We'll work with the `full` table and begin by printing the first few rows. (_We have hidden the corresponding code. To take a peek, click on the \"Code\" button below._)" ] }, { @@ -73,13 +73,13 @@ "# API request - fetch the dataset\n", "dataset = client.get_dataset(dataset_ref)\n", "\n", - "# Construct a reference to the \"comments\" table\n", - "table_ref = dataset_ref.table(\"comments\")\n", + "# Construct a reference to the \"full\" table\n", + "table_ref = dataset_ref.table(\"full\")\n", "\n", "# API request - fetch the table\n", "table = client.get_table(table_ref)\n", "\n", - "# Preview the first five lines of the \"comments\" table\n", + "# Preview the first five lines of the table\n", "client.list_rows(table, max_results=5).to_dataframe()" ] }, @@ -105,7 +105,7 @@ "# Query to select comments that received more than 10 replies\n", "query_popular = \"\"\"\n", " SELECT parent, COUNT(id)\n", - " FROM `bigquery-public-data.hacker_news.comments`\n", + " FROM `bigquery-public-data.hacker_news.full`\n", " GROUP BY parent\n", " HAVING COUNT(id) > 10\n", " \"\"\"" @@ -160,7 +160,7 @@ "# Improved version of earlier query, now with aliasing & improved readability\n", "query_improved = \"\"\"\n", " SELECT parent, COUNT(1) AS NumPosts\n", - " FROM `bigquery-public-data.hacker_news.comments`\n", + " FROM `bigquery-public-data.hacker_news.full`\n", " GROUP BY parent\n", " HAVING COUNT(1) > 10\n", " \"\"\"\n", @@ -199,7 +199,7 @@ "source": [ "query_good = \"\"\"\n", " SELECT parent, COUNT(id)\n", - " FROM `bigquery-public-data.hacker_news.comments`\n", + " FROM `bigquery-public-data.hacker_news.full`\n", " GROUP BY parent\n", " \"\"\"" ] @@ -222,8 +222,8 @@ "outputs": [], "source": [ "query_bad = \"\"\"\n", - " SELECT author, parent, COUNT(id)\n", - " FROM `bigquery-public-data.hacker_news.comments`\n", + " SELECT `by` AS author, parent, COUNT(id)\n", + " FROM `bigquery-public-data.hacker_news.full`\n", " GROUP BY parent\n", " \"\"\"" ] @@ -234,6 +234,8 @@ "source": [ "If make this error, you'll get the error message `SELECT list expression references column (column's name) which is neither grouped nor aggregated at`.\n", "\n", + "You may notice the `` `by` `` column in this query is surrounded by backticks. This is because **BY** is a reserved keyword used in clauses including **GROUP BY**. In BigQuery reserved keywords used as identifiers must be quoted in backticks to avoid an error. We also make subsequent references to this column more readable by adding an alias to rename it to `author`.\n", + "\n", "# Your turn\n", "\n", "These aggregations let you write much more interesting queries. Try it yourself with **[these coding exercises](#$NEXT_NOTEBOOK_URL$)**."