Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SQL tutorial ex3 fixes #458

Merged
merged 3 commits into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions learntools/sql/ex3.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@

# (1) ProlificCommenters
prolific_commenters_query = """
SELECT author, COUNT(id) AS NumPosts
FROM `bigquery-public-data.hacker_news.comments`
SELECT `by` AS author, COUNT(id) AS NumPosts
FROM `bigquery-public-data.hacker_news.full`
GROUP BY author
HAVING COUNT(id) > 10000
"""
Expand All @@ -19,7 +19,7 @@
# (2) NumDeletedPosts
deleted_posts_query = """
SELECT COUNT(1) AS num_deleted_posts
FROM `bigquery-public-data.hacker_news.comments`
FROM `bigquery-public-data.hacker_news.full`
WHERE deleted = True
"""
query_job = client.query(deleted_posts_query)
Expand All @@ -46,8 +46,8 @@ def check(self, results):
_solution = CS(\
"""
prolific_commenters_query = \"""
SELECT author, COUNT(1) AS NumPosts
FROM `bigquery-public-data.hacker_news.comments`
SELECT `by` AS author, COUNT(1) AS NumPosts
FROM `bigquery-public-data.hacker_news.full`
GROUP BY author
HAVING COUNT(1) > 10000
\"""
Expand All @@ -62,7 +62,7 @@ class NumDeletedPosts(EqualityCheckProblem):
# Query to determine how many posts were deleted
deleted_posts_query = \"""
SELECT COUNT(1) AS num_deleted_posts
FROM `bigquery-public-data.hacker_news.comments`
FROM `bigquery-public-data.hacker_news.full`
WHERE deleted = True
\"""

Expand Down
14 changes: 7 additions & 7 deletions notebooks/sql/raw/ex3.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"The code cell below fetches the `comments` table from the `hacker_news` dataset. We also preview the first five rows of the table."
"The code cell below fetches the `full` table from the `hacker_news` dataset. We also preview the first five rows of the table."
]
},
{
Expand All @@ -48,13 +48,13 @@
"# API request - fetch the dataset\n",
"dataset = client.get_dataset(dataset_ref)\n",
"\n",
"# Construct a reference to the \"comments\" table\n",
"table_ref = dataset_ref.table(\"comments\")\n",
"# Construct a reference to the \"full\" table\n",
"table_ref = dataset_ref.table(\"full\")\n",
"\n",
"# API request - fetch the table\n",
"table = client.get_table(table_ref)\n",
"\n",
"# Preview the first five lines of the \"comments\" table\n",
"# Preview the first five lines of the table\n",
"client.list_rows(table, max_results=5).to_dataframe()"
]
},
Expand All @@ -72,7 +72,7 @@
"```\n",
"query = \"\"\"\n",
" SELECT parent, COUNT(1) AS NumPosts\n",
" FROM `bigquery-public-data.hacker_news.comments`\n",
" FROM `bigquery-public-data.hacker_news.full`\n",
" GROUP BY parent\n",
" HAVING COUNT(1) > 10\n",
" \"\"\"\n",
Expand All @@ -86,7 +86,7 @@
"outputs": [],
"source": [
"# Query to select prolific commenters and post counts\n",
"prolific_commenters_query = ____ # Your code goes here\n",
"prolific_commenters_query = \"\"\"____\"\"\" # Your code goes here\n",
"\n",
"# Set up the query (cancel the query if it would use too much of \n",
"# your quota, with the limit set to 1 GB)\n",
Expand Down Expand Up @@ -125,7 +125,7 @@
"source": [
"### 2) Deleted comments\n",
"\n",
"How many comments have been deleted? (If a comment was deleted, the `deleted` column in the comments table will have the value `True`.)"
"How many comments have been deleted? (If a comment was deleted, the `deleted` column in the table will have the value `True`.)"
]
},
{
Expand Down
20 changes: 11 additions & 9 deletions notebooks/sql/raw/tut3.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
"\n",
"Ready to see an example on a real dataset? The Hacker News dataset contains information on stories and comments from the Hacker News social networking site. \n",
"\n",
"We'll work with the `comments` table and begin by printing the first few rows. (_We have hidden the corresponding code. To take a peek, click on the \"Code\" button below._)"
"We'll work with the `full` table and begin by printing the first few rows. (_We have hidden the corresponding code. To take a peek, click on the \"Code\" button below._)"
]
},
{
Expand All @@ -73,13 +73,13 @@
"# API request - fetch the dataset\n",
"dataset = client.get_dataset(dataset_ref)\n",
"\n",
"# Construct a reference to the \"comments\" table\n",
"table_ref = dataset_ref.table(\"comments\")\n",
"# Construct a reference to the \"full\" table\n",
"table_ref = dataset_ref.table(\"full\")\n",
"\n",
"# API request - fetch the table\n",
"table = client.get_table(table_ref)\n",
"\n",
"# Preview the first five lines of the \"comments\" table\n",
"# Preview the first five lines of the table\n",
"client.list_rows(table, max_results=5).to_dataframe()"
]
},
Expand All @@ -105,7 +105,7 @@
"# Query to select comments that received more than 10 replies\n",
"query_popular = \"\"\"\n",
" SELECT parent, COUNT(id)\n",
" FROM `bigquery-public-data.hacker_news.comments`\n",
" FROM `bigquery-public-data.hacker_news.full`\n",
" GROUP BY parent\n",
" HAVING COUNT(id) > 10\n",
" \"\"\""
Expand Down Expand Up @@ -160,7 +160,7 @@
"# Improved version of earlier query, now with aliasing & improved readability\n",
"query_improved = \"\"\"\n",
" SELECT parent, COUNT(1) AS NumPosts\n",
" FROM `bigquery-public-data.hacker_news.comments`\n",
" FROM `bigquery-public-data.hacker_news.full`\n",
" GROUP BY parent\n",
" HAVING COUNT(1) > 10\n",
" \"\"\"\n",
Expand Down Expand Up @@ -199,7 +199,7 @@
"source": [
"query_good = \"\"\"\n",
" SELECT parent, COUNT(id)\n",
" FROM `bigquery-public-data.hacker_news.comments`\n",
" FROM `bigquery-public-data.hacker_news.full`\n",
" GROUP BY parent\n",
" \"\"\""
]
Expand All @@ -222,8 +222,8 @@
"outputs": [],
"source": [
"query_bad = \"\"\"\n",
" SELECT author, parent, COUNT(id)\n",
" FROM `bigquery-public-data.hacker_news.comments`\n",
" SELECT `by` AS author, parent, COUNT(id)\n",
" FROM `bigquery-public-data.hacker_news.full`\n",
" GROUP BY parent\n",
" \"\"\""
]
Expand All @@ -234,6 +234,8 @@
"source": [
"If make this error, you'll get the error message `SELECT list expression references column (column's name) which is neither grouped nor aggregated at`.\n",
"\n",
"You may notice the `` `by` `` column in this query is surrounded by backticks. This is because **BY** is a reserved keyword used in clauses including **GROUP BY**. In BigQuery reserved keywords used as identifiers must be quoted in backticks to avoid an error. We also make subsequent references to this column more readable by adding an alias to rename it to `author`.\n",
"\n",
"# Your turn\n",
"\n",
"These aggregations let you write much more interesting queries. Try it yourself with **[these coding exercises](#$NEXT_NOTEBOOK_URL$)**."
Expand Down