crewAIInc · primeobsession · Nov 4, 2024 · Dec 5, 2024
diff --git a/crewai_tools/__init__.py b/crewai_tools/__init__.py
@@ -14,12 +14,15 @@
     FirecrawlCrawlWebsiteTool,
     FirecrawlScrapeWebsiteTool,
     FirecrawlSearchTool,
+    GetOpengraphTagsTool,
     GithubSearchTool,
     JSONSearchTool,
     LlamaIndexTool,
     MDXSearchTool,
     MultiOnTool,
+    MySQLSearchTool,
     NL2SQLTool,
+    OpenGraphScrapeWebsiteTool,
     PDFSearchTool,
     PGSearchTool,
     RagTool,
@@ -40,6 +43,5 @@
     XMLSearchTool,
     YoutubeChannelSearchTool,
     YoutubeVideoSearchTool,
-    MySQLSearchTool
 )
 from .tools.base_tool import BaseTool, Tool, tool
diff --git a/crewai_tools/tools/__init__.py b/crewai_tools/tools/__init__.py
@@ -11,27 +11,34 @@
 from .file_read_tool.file_read_tool import FileReadTool
 from .file_writer_tool.file_writer_tool import FileWriterTool
 from .firecrawl_crawl_website_tool.firecrawl_crawl_website_tool import (
-    FirecrawlCrawlWebsiteTool
+    FirecrawlCrawlWebsiteTool,
 )
 from .firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import (
-    FirecrawlScrapeWebsiteTool
+    FirecrawlScrapeWebsiteTool,
 )
 from .firecrawl_search_tool.firecrawl_search_tool import FirecrawlSearchTool
 from .github_search_tool.github_search_tool import GithubSearchTool
 from .json_search_tool.json_search_tool import JSONSearchTool
 from .llamaindex_tool.llamaindex_tool import LlamaIndexTool
 from .mdx_seach_tool.mdx_search_tool import MDXSearchTool
 from .multion_tool.multion_tool import MultiOnTool
+from .mysql_search_tool.mysql_search_tool import MySQLSearchTool
 from .nl2sql.nl2sql_tool import NL2SQLTool
+from .opengraphio_get_opengraph_tags_tool.opengraphio_get_opengraph_tags_tool import (
+    GetOpengraphTagsTool,
+)
+from .opengraphio_scrape_website_tool.opengraphio_scrape_website_tool import (
+    OpenGraphScrapeWebsiteTool,
+)
 from .pdf_search_tool.pdf_search_tool import PDFSearchTool
 from .pg_seach_tool.pg_search_tool import PGSearchTool
 from .rag.rag_tool import RagTool
 from .scrape_element_from_website.scrape_element_from_website import (
-    ScrapeElementFromWebsiteTool
+    ScrapeElementFromWebsiteTool,
 )
 from .scrape_website_tool.scrape_website_tool import ScrapeWebsiteTool
 from .scrapfly_scrape_website_tool.scrapfly_scrape_website_tool import (
-    ScrapflyScrapeWebsiteTool
+    ScrapflyScrapeWebsiteTool,
 )
 from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool
 from .serper_dev_tool.serper_dev_tool import SerperDevTool
@@ -46,7 +53,6 @@
 from .website_search.website_search_tool import WebsiteSearchTool
 from .xml_search_tool.xml_search_tool import XMLSearchTool
 from .youtube_channel_search_tool.youtube_channel_search_tool import (
-    YoutubeChannelSearchTool
+    YoutubeChannelSearchTool,
 )
 from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool
-from .mysql_search_tool.mysql_search_tool import MySQLSearchTool
diff --git a/crewai_tools/tools/opengraphio_get_opengraph_tags_tool/README.md b/crewai_tools/tools/opengraphio_get_opengraph_tags_tool/README.md
@@ -0,0 +1,120 @@
+# OpenGraphIOGetOpenGraphTagsTool
+
+## Description
+
+The `OpenGraphIOGetOpenGraphTagsTool` is a tool for retrieving OpenGraph tags from websites using the OpenGraph.io API. 
+It extracts key OpenGraph metadata, such as titles, descriptions, and images from webpages, allowing users to gather 
+insights about any given URL.  In addition to the tags found on the site, the Opengraph.io API will infer values
+that may be missing from the page.
+
+## Installation
+
+To use the `OpenGraphIOGetOpenGraphTagsTool`, you need to install the `crewai[tools]` package:
+
+```sh
+pip install crewai[tools]
+```
+
+## Example
+
+```python
+# To run the example, you will need to make sure you have your API keys set.
+# 1. create a free account on https://opengraph.io/
+# 2. set the OPENGRAPHIO_API_KEY environment variable to your API key
+# 3. run the example
+
+from crewai_tools.tools.opengraphio_get_opengraph_tags_tool.opengraphio_get_opengraph_tags_tool import GetOpengraphTagsTool
+from crewai import Agent, Task, Crew
+
+# Create an instance of the OpenGraphTool
+opengraph_tags_tool = GetOpengraphTagsTool()
+
+# Create the agent with the OpenGraphTool
+opengraph_specialist = Agent(
+    role="Open Graph Metadata Specialist",
+    goal="Suggest most relevant Open Graph metadata tags for a website",
+    backstory="A skilled SEO / SEM consultant with 20 years of experience.",
+    tools=[opengraph_tags_tool],
+    verbose=True,
+    cache=False
+)
+
+# Define the tasks for the agent
+suggest_opengraph_tags = Task(
+    description="Review the OpenGraph metadata and the tags suggested from the Opengraph.io API for "
+                "https://www.wunderground.com/ and suggest the most relevant Open Graph metadata tags.  "
+                "The Opengraph.io API will return the following important properties:"
+                "- hybridGraph - The tags that the Opengraph.io API suggests for the page"
+                "- openGraph - The tags that are currently on the page",
+    expected_output="Provide the tags that are currently on the page ('openGraph' property) and suggest HTML to be "
+                    "inserted into the <HEAD> tag to provide more effective tags for sharing on social websites. "
+                    "The response should look like this:"
+                    "## Current Tags"
+                    "You're assessment of the current tags"
+                    "## Suggested Tags"
+                    "You're suggested HTML content to add to the <HEAD> tag"
+                    "### Explanation"
+                    "Explain why you suggest these tags",
+    agent=opengraph_specialist
+)
+
+
+# Create a crew with the agent and tasks
+crew = Crew(
+    agents=[opengraph_specialist],
+    tasks=[
+        suggest_opengraph_tags
+    ],
+    verbose=True
+)
+
+# Kick off the crew to execute the tasks
+crew.kickoff()
+
+```
+### Output
+```bash
+# Agent: Open Graph Metadata Specialist
+## Final Answer: 
+## Current Tags  
+The current Open Graph tags were not found on the page; however, there are inferred tags based on the content extracted:  
+- Title: Local Weather Forecast, News and Conditions | Weather Underground  
+- Description: Weather Underground provides local & long-range weather forecasts, weather reports, maps & tropical weather conditions for locations worldwide  
+- Type: site  
+- URL: https://www.wunderground.com/  
+- Site Name: Local Weather Forecast, News and Conditions  
+- Image: https://www.wunderground.com/static/i/misc/twc-white.svg  
+
+## Suggested Tags  
+To enhance social sharing, I suggest adding the following HTML content to the `<HEAD>` tag:  
+\`\`\`html
+<meta property="og:title" content="Local Weather Forecast, News and Conditions | Weather Underground" />  
+<meta property="og:description" content="Weather Underground provides local & long-range weather forecasts, weather reports, maps & tropical weather conditions for locations worldwide" />  
+<meta property="og:type" content="website" />  
+<meta property="og:url" content="https://www.wunderground.com/" />  
+<meta property="og:site_name" content="Weather Underground" />  
+<meta property="og:image" content="https://www.wunderground.com/static/i/misc/twc-white.svg" />  
+<meta property="og:image:alt" content="Weather Underground Logo" />  
+\`\`\`
+
+### Explanation  
+I suggest these tags because they provide essential metadata for social platforms to display rich previews when the link to Weather Underground is shared. Including a specific image (`og:image`) enhances the visual appeal, while a clear and concise title and description (`og:title` and `og:description`) can help engage users and improve click-through rates. These elements ensure that the page is represented accurately and attractively on social media, which is crucial for driving traffic and improving user engagement.
+
+```
+
+## Arguments
+- `url` (string): The webpage URL to scrape.
+- `full_render` (bool, optional): Whether to fully render the page before extracting metadata.
+- `max_cache_age` (int, optional): The maximum cache age in milliseconds.
+- `use_proxy` (bool, optional): Whether to use a proxy for scraping.
+- `use_premium` (bool, optional): Whether to use the Premium Proxy feature.
+- `use_superior` (bool, optional): Whether to use the Superior Proxy feature.
+- `auto_proxy` (bool, optional): Whether to automatically use a proxy for domains that require one.
+- `cache_ok` (bool, optional): Whether to allow cached responses.
+- `accept_lang` (string, optional): The request language sent when requesting the URL.
+- `ignore_scrape_failures` (bool, optional): Whether to ignore failures.
+
+## API Key
+To use the OpenGraph.io API, you need to create a free account on [https://opengraph.io](https://opengraph.io) and set 
+the OPENGRAPHIO_API_KEY environment variable to your API key.
+
diff --git a/crewai_tools/tools/opengraphio_get_opengraph_tags_tool/example.py b/crewai_tools/tools/opengraphio_get_opengraph_tags_tool/example.py
@@ -0,0 +1,44 @@
+from crewai import Agent, Crew, Task
+
+from crewai_tools.tools.opengraphio_get_opengraph_tags_tool.opengraphio_get_opengraph_tags_tool import (
+    GetOpengraphTagsTool,
+)
+
+# Create an instance of the OpenGraphTool
+opengraph_tags_tool = GetOpengraphTagsTool()
+
+# Create the agent with the OpenGraphTool
+opengraph_specialist = Agent(
+    role="Open Graph Metadata Specialist",
+    goal="Suggest most relevant Open Graph metadata tags for a website",
+    backstory="A skilled SEO / SEM consultant with 20 years of experience.",
+    tools=[opengraph_tags_tool],
+    verbose=True,
+    cache=False,
+)
+
+# Define the tasks for the agent
+suggest_opengraph_tags = Task(
+    description="Review the OpenGraph metadata and the tags suggested from the Opengraph.io API for "
+    "https://www.wunderground.com/ and suggest the most relevant Open Graph metadata tags.  "
+    "The Opengraph.io API will return the following important properties:"
+    "- hybridGraph - The tags that the Opengraph.io API suggests for the page"
+    "- openGraph - The tags that are currently on the page",
+    expected_output="Provide the tags that are currently on the page ('openGraph' property) and suggest HTML to be "
+    "inserted into the <HEAD> tag to provide more effective tags for sharing on social websites. "
+    "The response should look like this:"
+    "## Current Tags"
+    "You're assessment of the current tags"
+    "## Suggested Tags"
+    "You're suggested HTML content to add to the <HEAD> tag"
+    "### Explanation"
+    "Explain why you suggest these tags",
+    agent=opengraph_specialist,
+)
+
+
+# Create a crew with the agent and tasks
+crew = Crew(agents=[opengraph_specialist], tasks=[suggest_opengraph_tags], verbose=True)
+
+# Kick off the crew to execute the tasks
+crew.kickoff()
diff --git a/...ai_tools/tools/opengraphio_get_opengraph_tags_tool/opengraphio_get_opengraph_tags_tool.py b/...ai_tools/tools/opengraphio_get_opengraph_tags_tool/opengraphio_get_opengraph_tags_tool.py
@@ -0,0 +1,105 @@
+import logging
+import os
+from typing import Optional, Type
+
+from pydantic import BaseModel, Field
+
+from crewai_tools.tools.base_tool import BaseTool
+
+logger = logging.getLogger(__file__)
+
+
+class GetOpengraphTagsToolSchema(BaseModel):
+    url: str = Field(description="Webpage URL")
+    cache_ok: Optional[bool] = Field(
+        default=None, description="Whether to allow cached responses"
+    )
+    full_render: Optional[bool] = Field(
+        default=None,
+        description="Whether to fully render the page before extracting metadata",
+    )
+    use_proxy: Optional[bool] = Field(
+        default=None, description="Whether to use a proxy for scraping"
+    )
+    use_premium: Optional[bool] = Field(
+        default=None, description="Whether to use the Premium Proxy feature"
+    )
+    use_superior: Optional[bool] = Field(
+        default=None, description="Whether to use the Superior Proxy feature"
+    )
+    auto_proxy: Optional[bool] = Field(
+        default=None,
+        description="Whether to automatically use a proxy for domains that require one",
+    )
+    max_cache_age: Optional[int] = Field(
+        default=None, description="The maximum cache age in milliseconds"
+    )
+    accept_lang: Optional[str] = Field(
+        default=None, description="The request language sent when requesting the URL"
+    )
+    ignore_scrape_failures: Optional[bool] = Field(
+        default=None, description="Whether to ignore failures"
+    )
+
+
+class GetOpengraphTagsTool(BaseTool):
+    name: str = "OpenGraph.io tags extraction tool"
+    description: str = "Extract OpenGraph tags from a webpage URL using OpenGraph.io"
+    args_schema: Type[BaseModel] = GetOpengraphTagsToolSchema
+    api_key: str = None
+
+    def __init__(self, api_key: Optional[str] = None):
+        super().__init__()
+        self.api_key = api_key or os.getenv("OPENGRAPHIO_API_KEY")
+
+    def _run(
+        self,
+        url: str,
+        cache_ok: Optional[bool] = None,
+        full_render: Optional[bool] = None,
+        use_proxy: Optional[bool] = None,
+        use_premium: Optional[bool] = None,
+        use_superior: Optional[bool] = None,
+        auto_proxy: Optional[bool] = None,
+        max_cache_age: Optional[int] = None,
+        accept_lang: Optional[str] = None,
+        ignore_scrape_failures: Optional[bool] = None,
+    ):
+        import urllib.parse
+
+        import requests
+
+        encoded_url = urllib.parse.quote_plus(url)
+        api_endpoint = f"https://opengraph.io/api/1.1/site/{encoded_url}"
+        params = {"app_id": self.api_key}
+
+        if cache_ok is not None:
+            params["cache_ok"] = cache_ok
+        if full_render is not None:
+            params["full_render"] = full_render
+        if use_proxy is not None:
+            params["use_proxy"] = use_proxy
+        if use_premium is not None:
+            params["use_premium"] = use_premium
+        if use_superior is not None:
+            params["use_superior"] = use_superior
+        if auto_proxy is not None:
+            params["auto_proxy"] = auto_proxy
+        if max_cache_age is not None:
+            params["max_cache_age"] = max_cache_age
+        if accept_lang is not None:
+            params["accept_lang"] = accept_lang
+
+        try:
+            response = requests.get(api_endpoint, params=params)
+            response.raise_for_status()
+            data = response.json()
+            return data
+        except requests.RequestException as e:
+            if ignore_scrape_failures:
+                logger.error(
+                    f"Error fetching OpenGraph tags from {url}, exception: {e}"
+                )
+                return None
+            else:
+                raise e