UBC-MDS · htang085 · Feb 2, 2025 · Feb 2, 2025 · Feb 2, 2025 · Feb 2, 2025
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2025, group29
+Copyright (c) 2025, Lixuan Lin, Hui Tang, Sienko Ikhabi
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -18,5 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -18,6 +18,43 @@ $ pip install dsci524_group29_webscraping
 - `parse_content(html, selector, selector_type)`: Parses the provided HTML content using CSS selectors or XPath to extract specified data.
 - `save_data(data, format, destination)`: Saves the extracted data into the desired format (e.g., TXT, CSV, JSON) at the specified destination path.
 
+## Usage
+
+Below are examples demonstrating how to use the main functions in this package:
+
+### 1. Fetch HTML Content
+```python
+from dsci524_group29_webscraping import fetch_html
+
+# Fetch the raw HTML content from a webpage
+url = "https://example.com"
+html_content = fetch_html(url)
+print(html_content)  # Outputs the HTML content of the page
+```
+
+### 2. Parse Content
+```python
+from dsci524_group29_webscraping import parse_content
+
+# Parse the HTML content to extract specific elements
+selector = "h1"  # Example: extract all <h1> elements
+selector_type = "css"  # Use CSS selectors
+extracted_data = parse_content(html_content, selector, selector_type)
+print(extracted_data)  # Outputs a list of the extracted data
+```
+
+### 3. Save Data
+```python
+from dsci524_group29_webscraping import save_data
+
+# Save the extracted data to a CSV file
+data = [{"name": "Alice", "age": 25}, {"name": "Bob", "age": 30}]  # Example data
+file_path = save_data(data, format="csv", destination="output.csv")
+print(f"Data saved to: {file_path}")
+```
+
+This package simplifies the process of fetching, parsing, and saving web data, making it ideal for beginners.
+
 ## Python Ecosystem
 
 While libraries like [`BeautifulSoup`](https://www.crummy.com/software/BeautifulSoup/bs4/doc/) 
@@ -35,7 +72,6 @@ it accessible for quick tasks and educational purposes.
 *dsci524_group29_webscraping* differentiates itself by offering a simple set of functions 
 that do the job for simple, beginner level needs.
 
-
 ## Contributors
 
 - Lixuan Lin

diff --git a/pyproject.toml b/pyproject.toml
@@ -2,7 +2,11 @@
 name = "dsci524_group29_webscraping"
 version = "1.1.3"
 description = "A simple Python toolkit for web scraping"
-authors = ["group29"]
+authors = [
+    "Lixuan Lin",
+    "Hui Tang",
+    "Sienko Ikhabi"
+]
 license = "MIT"
 readme = "README.md"
 
@@ -30,4 +34,4 @@ build_command = "pip install poetry && poetry build" # build dists
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]
-build-backend = "poetry.core.masonry.api"
+build-backend = "poetry.core.masonry.api"
diff --git a/src/dsci524_group29_webscraping/save_data.py b/src/dsci524_group29_webscraping/save_data.py
@@ -44,31 +44,40 @@ def save_data(data, format='csv', destination='output.csv'):
     # Validate the destination directory
     dir_path = os.path.dirname(destination)
     if dir_path and not os.path.exists(dir_path):
+        # Ensure the directory exists before attempting to save
         raise FileNotFoundError(f"The directory {dir_path} does not exist.")
 
-    # Save as CSV
+    # Save data in CSV format
     if format == 'csv':
+        # Ensure the input data is a list of dictionaries
         if not isinstance(data, list) or not all(isinstance(item, dict) for item in data):
             raise ValueError("For CSV, data must be a list of dictionaries.")
         try:
             with open(destination, mode='w', newline='') as file:
+                # Write the data to the CSV file
                 writer = csv.DictWriter(file, fieldnames=data[0].keys())
-                writer.writeheader()
-                writer.writerows(data)
+                writer.writeheader()  # Write the header row
+                writer.writerows(data)  # Write the data rows
         except Exception as e:
+            # Handle unexpected issues when saving the CSV file
             raise Exception(f"Failed to save CSV data: {e}")
 
-    # Save as JSON
+    # Save data in JSON format
     elif format == 'json':
+        # Ensure the input data is either a list or a dictionary
         if not isinstance(data, (list, dict)):
             raise ValueError("For JSON, data must be a list or a dictionary.")
         try:
             with open(destination, mode='w') as file:
+                # Write the JSON data to the file with indentation for readability
                 json.dump(data, file, indent=4)
         except Exception as e:
+            # Handle unexpected issues when saving the JSON file
             raise Exception(f"Failed to save JSON data: {e}")
 
     else:
+        # Raise an error for unsupported formats
         raise ValueError("Unsupported format. Use 'csv' or 'json'.")
 
-    return os.path.abspath(destination)
+    # Return the absolute path to the saved file
+    return os.path.abspath(destination)