From 31da54d7a91ba5781459cf5fce84a7a10dc0a16b Mon Sep 17 00:00:00 2001
From: Hui Tang <htang085@uottawa.ca>
Date: Sat, 1 Feb 2025 16:49:29 -0800
Subject: [PATCH 1/5] fix: add detailed inline comments to improve code
 readability (#80)

---
 src/dsci524_group29_webscraping/save_data.py | 22 +++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/src/dsci524_group29_webscraping/save_data.py b/src/dsci524_group29_webscraping/save_data.py
index 3b82749..1f49bb0 100644
--- a/src/dsci524_group29_webscraping/save_data.py
+++ b/src/dsci524_group29_webscraping/save_data.py
@@ -42,33 +42,45 @@ def save_data(data, format='csv', destination='output.csv'):
         - If the specified directory in the destination does not exist, a FileNotFoundError will be raised.
     """
     # Validate the destination directory
+    # Check if the directory in the destination path exists
     dir_path = os.path.dirname(destination)
     if dir_path and not os.path.exists(dir_path):
+        # Raise an error if the directory does not exist
         raise FileNotFoundError(f"The directory {dir_path} does not exist.")
 
-    # Save as CSV
+    # Save data in CSV format
     if format == 'csv':
+        # Ensure the input data is a list of dictionaries
         if not isinstance(data, list) or not all(isinstance(item, dict) for item in data):
             raise ValueError("For CSV, data must be a list of dictionaries.")
         try:
+            # Open the destination file in write mode
             with open(destination, mode='w', newline='') as file:
+                # Create a CSV writer object
                 writer = csv.DictWriter(file, fieldnames=data[0].keys())
-                writer.writeheader()
-                writer.writerows(data)
+                writer.writeheader()  # Write the header row
+                writer.writerows(data)  # Write the data rows
         except Exception as e:
+            # Raise an error if CSV saving fails
             raise Exception(f"Failed to save CSV data: {e}")
 
-    # Save as JSON
+    # Save data in JSON format
     elif format == 'json':
+        # Ensure the input data is either a list or a dictionary
         if not isinstance(data, (list, dict)):
             raise ValueError("For JSON, data must be a list or a dictionary.")
         try:
+            # Open the destination file in write mode
             with open(destination, mode='w') as file:
+                # Write the JSON data to the file with indentation for readability
                 json.dump(data, file, indent=4)
         except Exception as e:
+            # Raise an error if JSON saving fails
             raise Exception(f"Failed to save JSON data: {e}")
 
+    # Raise an error if the specified format is unsupported
     else:
         raise ValueError("Unsupported format. Use 'csv' or 'json'.")
 
-    return os.path.abspath(destination)
+    # Return the absolute path to the saved file
+    return os.path.abspath(destination)
\ No newline at end of file

From 36a36377b4b80cee6daf630f0f6acc51bab1f6f5 Mon Sep 17 00:00:00 2001
From: Hui Tang <htang085@uottawa.ca>
Date: Sat, 1 Feb 2025 16:57:21 -0800
Subject: [PATCH 2/5] fix: updated authors list in pyproject.toml with all team
 members (#75)

---
 pyproject.toml | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index fae4508..be49c52 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,11 @@
 name = "dsci524_group29_webscraping"
 version = "1.1.3"
 description = "A simple Python toolkit for web scraping"
-authors = ["group29"]
+authors = [
+    "Lixuan Lin",
+    "Hui Tang",
+    "Sienko Ikhabi"
+]
 license = "MIT"
 readme = "README.md"
 
@@ -30,4 +34,4 @@ build_command = "pip install poetry && poetry build" # build dists
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]
-build-backend = "poetry.core.masonry.api"
+build-backend = "poetry.core.masonry.api"
\ No newline at end of file

From 48bc46852d8cd9d1dba39d71181f91d74a52701a Mon Sep 17 00:00:00 2001
From: Hui Tang <htang085@uottawa.ca>
Date: Sat, 1 Feb 2025 17:13:10 -0800
Subject: [PATCH 3/5] fix: added usage instructions to README.md (#69)

---
 README.md | 38 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8f31f01..9737eaa 100644
--- a/README.md
+++ b/README.md
@@ -18,6 +18,43 @@ $ pip install dsci524_group29_webscraping
 - `parse_content(html, selector, selector_type)`: Parses the provided HTML content using CSS selectors or XPath to extract specified data.
 - `save_data(data, format, destination)`: Saves the extracted data into the desired format (e.g., TXT, CSV, JSON) at the specified destination path.
 
+## Usage
+
+Below are examples demonstrating how to use the main functions in this package:
+
+### 1. Fetch HTML Content
+```python
+from dsci524_group29_webscraping import fetch_html
+
+# Fetch the raw HTML content from a webpage
+url = "https://example.com"
+html_content = fetch_html(url)
+print(html_content)  # Outputs the HTML content of the page
+```
+
+### 2. Parse Content
+```python
+from dsci524_group29_webscraping import parse_content
+
+# Parse the HTML content to extract specific elements
+selector = "h1"  # Example: extract all <h1> elements
+selector_type = "css"  # Use CSS selectors
+extracted_data = parse_content(html_content, selector, selector_type)
+print(extracted_data)  # Outputs a list of the extracted data
+```
+
+### 3. Save Data
+```python
+from dsci524_group29_webscraping import save_data
+
+# Save the extracted data to a CSV file
+data = [{"name": "Alice", "age": 25}, {"name": "Bob", "age": 30}]  # Example data
+file_path = save_data(data, format="csv", destination="output.csv")
+print(f"Data saved to: {file_path}")
+```
+
+This package simplifies the process of fetching, parsing, and saving web data, making it ideal for beginners.
+
 ## Python Ecosystem
 
 While libraries like [`BeautifulSoup`](https://www.crummy.com/software/BeautifulSoup/bs4/doc/) 
@@ -35,7 +72,6 @@ it accessible for quick tasks and educational purposes.
 *dsci524_group29_webscraping* differentiates itself by offering a simple set of functions 
 that do the job for simple, beginner level needs.
 
-
 ## Contributors
 
 - Lixuan Lin

From d7f1515254f1fa6cabf7583487351d1acda25157 Mon Sep 17 00:00:00 2001
From: Hui Tang <htang085@uottawa.ca>
Date: Sat, 1 Feb 2025 17:16:05 -0800
Subject: [PATCH 4/5] fix: updated LICENSE to list all authors (#75)

---
 LICENSE | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/LICENSE b/LICENSE
index 45183cb..6254a4f 100755
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2025, group29
+Copyright (c) 2025, Lixuan Lin, Hui Tang, Sienko Ikhabi
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -18,5 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
+SOFTWARE.
\ No newline at end of file

From 4f6e4b351a1d29c3a1625388caf2964afb2bd7ca Mon Sep 17 00:00:00 2001
From: Hui Tang <htang085@uottawa.ca>
Date: Sat, 1 Feb 2025 17:25:30 -0800
Subject: [PATCH 5/5] fix: refactored save_data.py to improve readability and
 comments (#81)

---
 src/dsci524_group29_webscraping/save_data.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/src/dsci524_group29_webscraping/save_data.py b/src/dsci524_group29_webscraping/save_data.py
index 1f49bb0..b4ee5f6 100644
--- a/src/dsci524_group29_webscraping/save_data.py
+++ b/src/dsci524_group29_webscraping/save_data.py
@@ -42,10 +42,9 @@ def save_data(data, format='csv', destination='output.csv'):
         - If the specified directory in the destination does not exist, a FileNotFoundError will be raised.
     """
     # Validate the destination directory
-    # Check if the directory in the destination path exists
     dir_path = os.path.dirname(destination)
     if dir_path and not os.path.exists(dir_path):
-        # Raise an error if the directory does not exist
+        # Ensure the directory exists before attempting to save
         raise FileNotFoundError(f"The directory {dir_path} does not exist.")
 
     # Save data in CSV format
@@ -54,14 +53,13 @@ def save_data(data, format='csv', destination='output.csv'):
         if not isinstance(data, list) or not all(isinstance(item, dict) for item in data):
             raise ValueError("For CSV, data must be a list of dictionaries.")
         try:
-            # Open the destination file in write mode
             with open(destination, mode='w', newline='') as file:
-                # Create a CSV writer object
+                # Write the data to the CSV file
                 writer = csv.DictWriter(file, fieldnames=data[0].keys())
                 writer.writeheader()  # Write the header row
                 writer.writerows(data)  # Write the data rows
         except Exception as e:
-            # Raise an error if CSV saving fails
+            # Handle unexpected issues when saving the CSV file
             raise Exception(f"Failed to save CSV data: {e}")
 
     # Save data in JSON format
@@ -70,16 +68,15 @@ def save_data(data, format='csv', destination='output.csv'):
         if not isinstance(data, (list, dict)):
             raise ValueError("For JSON, data must be a list or a dictionary.")
         try:
-            # Open the destination file in write mode
             with open(destination, mode='w') as file:
                 # Write the JSON data to the file with indentation for readability
                 json.dump(data, file, indent=4)
         except Exception as e:
-            # Raise an error if JSON saving fails
+            # Handle unexpected issues when saving the JSON file
             raise Exception(f"Failed to save JSON data: {e}")
 
-    # Raise an error if the specified format is unsupported
     else:
+        # Raise an error for unsupported formats
         raise ValueError("Unsupported format. Use 'csv' or 'json'.")
 
     # Return the absolute path to the saved file