sinaptik-ai · gventuri · Nov 7, 2023 · Oct 31, 2023 · Oct 31, 2023 · Nov 1, 2023
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,29 +1,25 @@
 repos:
-- repo: https://github.com/psf/black
-  rev: 23.3.0
-  hooks:
-  - id: black
-- repo: https://github.com/charliermarsh/ruff-pre-commit
-  rev: v0.0.220
-  hooks:
-  - id: ruff
-    name: ruff
-        # Respect `exclude` and `extend-exclude` settings.
-    args: [--force-exclude]
-- repo: local
-  hooks:
-  - id: pytest-check
-    name: pytest-check
-    entry: poetry run pytest
-    language: system
-    pass_filenames: false
-    always_run: true
+  - repo: https://github.com/charliermarsh/ruff-pre-commit
+    rev: v0.1.3
+    hooks:
+      - id: ruff
+        name: ruff
+      - id: ruff-format
+        name: ruff-format
+  - repo: local
+    hooks:
+      - id: pytest-check
+        name: pytest-check
+        entry: poetry run pytest
+        language: system
+        pass_filenames: false
+        always_run: true
 
-- repo: https://github.com/sourcery-ai/sourcery
-  rev: v1.11.0
-  hooks:
-  - id: sourcery
-    # The best way to use Sourcery in a pre-commit hook:
-    # * review only changed lines:
-    # * omit the summary
-    args: [--diff=git diff HEAD, --no-summary]
+  - repo: https://github.com/sourcery-ai/sourcery
+    rev: v1.11.0
+    hooks:
+      - id: sourcery
+        # The best way to use Sourcery in a pre-commit hook:
+        # * review only changed lines:
+        # * omit the summary
+        args: [--diff=git diff HEAD, --no-summary]
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -15,7 +15,6 @@ To make a contribution, follow the following steps:
 
 For more details about pull requests, please read [GitHub's guides](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request).
 
-
 ### 📦 Package manager
 
 We use `poetry` as our package manager. You can install poetry by following the instructions [here](https://python-poetry.org/docs/#installation).
@@ -44,12 +43,12 @@ ruff pandasai examples
 
 Make sure that the linter does not report any errors or warnings before submitting a pull request.
 
-### Code Format with `black`
+### Code Format with `ruff-format`
 
-We use `black` to reformat the code by running the following command:
+We use `ruff` to reformat the code by running the following command:
 
 ```bash
-black pandasai 
+ruff format pandasai
 ```
 
 ### 🧪 Testing
@@ -62,8 +61,6 @@ poetry run pytest
 
 Make sure that all tests pass before submitting a pull request.
 
-
-
 ## 🚀 Release Process
 
 At the moment, the release process is manual. We try to make frequent releases. Usually, we release a new version when we have a new feature or bugfix. A developer with admin rights to the repository will create a new release on GitHub, and then publish the new version to PyPI.
diff --git a/examples/sql_direct_config.py b/examples/sql_direct_config.py
@@ -0,0 +1,49 @@
+"""Example of using PandasAI with a CSV file."""
+
+from pandasai import SmartDatalake
+from pandasai.llm import OpenAI
+from pandasai.connectors import PostgreSQLConnector
+
+
+# With a PostgreSQL database
+payment_connector = PostgreSQLConnector(
+    config={
+        "host": "localhost",
+        "port": 5432,
+        "database": "testdb",
+        "username": "postgres",
+        "password": "123456",
+        "table": "orders",
+    }
+)
+
+order_details = PostgreSQLConnector(
+    config={
+        "host": "localhost",
+        "port": 5432,
+        "database": "testdb",
+        "username": "postgres",
+        "password": "123456",
+        "table": "order_details",
+    }
+)
+
+products = PostgreSQLConnector(
+    config={
+        "host": "localhost",
+        "port": 5432,
+        "database": "testdb",
+        "username": "postgres",
+        "password": "123456",
+        "table": "products",
+    }
+)
+
+
+llm = OpenAI("YOUR_API_KEY")
+df = SmartDatalake(
+    [order_details, payment_connector, products],
+    config={"llm": llm, "direct_sql": True},
+)
+response = df.chat("Return Orders with OrderDetails and counts of distinct Products")
+print(response)
diff --git a/examples/using_workspace_env.py b/examples/using_workspace_env.py
@@ -0,0 +1,38 @@
+import os
+import pandas as pd
+from pandasai import Agent
+
+from pandasai.llm.openai import OpenAI
+from pandasai.schemas.df_config import Config
+
+employees_data = {
+    "EmployeeID": [1, 2, 3, 4, 5],
+    "Name": ["John", "Emma", "Liam", "Olivia", "William"],
+    "Department": ["HR", "Sales", "IT", "Marketing", "Finance"],
+}
+
+salaries_data = {
+    "EmployeeID": [1, 2, 3, 4, 5],
+    "Salary": [5000, 6000, 4500, 7000, 5500],
+}
+
+employees_df = pd.DataFrame(employees_data)
+salaries_df = pd.DataFrame(salaries_data)
+
+
+os.environ["PANDASAI_WORKSPACE"] = "workspace dir path"
+
+
+llm = OpenAI("YOUR_API_KEY")
+config__ = {"llm": llm, "save_charts": False}
+
+
+agent = Agent(
+    [employees_df, salaries_df],
+    config=Config(**config__),
+    memory_size=10,
+)
+
+# Chat with the agent
+response = agent.chat("plot salary against department?")
+print(response)
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -42,7 +42,7 @@ nav:
       - Documents Building: building_docs.md
       - License: license.md
 extra:
-  version: "1.4.2"
+  version: "1.4.4"
 plugins:
   - search
   - mkdocstrings:

diff --git a/pandasai/agent/__init__.py b/pandasai/agent/__init__.py
@@ -92,6 +92,14 @@ def chat(self, query: str, output_type: Optional[str] = None):
                 f"\n{exception}\n"
             )
 
+    def add_message(self, message, is_user=False):
+        """
+        Add message to the memory. This is useful when you want to add a message
+        to the memory without calling the chat function (for example, when you
+        need to add a message from the agent).
+        """
+        self._lake._memory.add(message, is_user=is_user)
+
     def check_if_related_to_conversation(self, query: str) -> bool:
         """
         Check if the query is related to the previous conversation

diff --git a/pandasai/assets/prompt_templates/default_instructions.tmpl b/pandasai/assets/prompt_templates/default_instructions.tmpl
@@ -0,0 +1,5 @@
+Analyze the data, using the provided dataframes (`dfs`).
+    1. Prepare: Preprocessing and cleaning data if necessary
+    2. Process: Manipulating data for analysis (grouping, filtering, aggregating, etc.)
+    3. Analyze: Conducting the actual analysis (if the user asks to plot a chart you must save it as an image in temp_chart.png and not show the chart.)
+    {viz_library_type}
diff --git a/pandasai/assets/prompt_templates/direct_sql_connector.tmpl b/pandasai/assets/prompt_templates/direct_sql_connector.tmpl
@@ -0,0 +1,39 @@
+You are provided with the following samples of sql tables data:
+
+<Tables>
+{tables}
+<Tables>
+
+<conversation>
+{conversation}
+</conversation>
+
+You are provided with following function that executes the sql query, 
+<Function>
+def execute_sql_query(sql_query: str) -> pd.Dataframe
+"""his method connect to the database executes the sql query and returns the dataframe"""
+</Function>
+
+This is the initial python function. Do not change the params.
+
+```python
+# TODO import all the dependencies required
+import pandas as pd
+
+def analyze_data() -> dict:
+    """
+    Analyze the data, using the provided dataframes (`dfs`).
+    1. Prepare: generate sql query to get data for analysis (grouping, filtering, aggregating, etc.)
+    2. Process: execute the query using execute method available to you which returns dataframe
+    3. Analyze: Conducting the actual analysis (if the user asks to plot a chart you must save it as an image in temp_chart.png and not show the chart.)
+    {viz_library_type}
+    At the end, return a dictionary of:
+    {output_type_hint}
+    """
+```
+
+Take a deep breath and reason step-by-step. Act as a senior data analyst.
+In the answer, you must never write the "technical" names of the tables.
+Based on the last message in the conversation:
+
+- return the updated analyze_data function wrapped within `python `
diff --git a/pandasai/assets/prompt_templates/generate_python_code.tmpl b/pandasai/assets/prompt_templates/generate_python_code.tmpl
@@ -6,8 +6,6 @@ You are provided with the following pandas DataFrames:
 {conversation}
 </conversation>
 
-{viz_library_type}
-
 This is the initial python function. Do not change the params. Given the context, use the right dataframes.
 ```python
 {current_code}

diff --git a/pandasai/assets/prompt_templates/viz_library.tmpl b/pandasai/assets/prompt_templates/viz_library.tmpl
@@ -0,0 +1 @@
+If the user requests to create a chart, utilize the Python {library} library to generate high-quality graphics that will be saved directly to a file.
diff --git a/pandasai/connectors/airtable.py b/pandasai/connectors/airtable.py
@@ -143,9 +143,7 @@ def execute(self):
         Returns:
             DataFrameType: The result of the connector.
         """
-        if cached := self._cached() or self._cached(
-            include_additional_filters=True
-        ):
+        if cached := self._cached() or self._cached(include_additional_filters=True):
             return pd.read_parquet(cached)
 
         if isinstance(self._instance, pd.DataFrame):

diff --git a/pandasai/connectors/databricks.py b/pandasai/connectors/databricks.py
@@ -63,3 +63,20 @@ def __repr__(self):
             f"host={self._config.host} port={self._config.port} "
             f"database={self._config.database} httpPath={str(self._config.httpPath)}"
         )
+
+    def equals(self, other):
+        if isinstance(other, self.__class__):
+            return (
+                self._config.dialect,
+                self._config.token,
+                self._config.host,
+                self._config.port,
+                self._config.httpPath,
+            ) == (
+                other._config.dialect,
+                other._config.token,
+                other._config.host,
+                other._config.port,
+                other._config.httpPath,
+            )
+        return False
-            f"host={self._config.host} port={self._config.port} "
-            f"database={self._config.database} httpPath={str(self._config.httpPath)}"
-        )
-
-    def equals(self, other):
-        if isinstance(other, self.__class__):
-            return (
-                self._config.dialect,
-                self._config.token,
-                self._config.host,
-                self._config.port,
-                self._config.httpPath,
-            ) == (
-                other._config.dialect,
-                other._config.token,
-                other._config.host,
-                other._config.port,
-                other._config.httpPath,
-            )
-        return False
+    def __eq__(self, other):
+        if isinstance(other, self.__class__):
+            return (
+                self._config.dialect,
+                self._config.token,
+                self._config.host,
+                self._config.port,
+                self._config.httpPath,
+            ) == (
+                other._config.dialect,
+                other._config.token,
+                other._config.host,
+                other._config.port,
+                other._config.httpPath,
+            )
+        return False
-            f"host={self._config.host} port={self._config.port} "
-            f"database={self._config.database} httpPath={str(self._config.httpPath)}"
-        )
-
-    def equals(self, other):
-        if isinstance(other, self.__class__):
-            return (
-                self._config.dialect,
-                self._config.token,
-                self._config.host,
-                self._config.port,
-                self._config.httpPath,
-            ) == (
-                other._config.dialect,
-                other._config.token,
-                other._config.host,
-                other._config.port,
-                other._config.httpPath,
-            )
-        return False
+    def __eq__(self, other):
+        if isinstance(other, self.__class__):
+            return (
+                self._config.dialect,
+                self._config.token,
+                self._config.host,
+                self._config.port,
+                self._config.httpPath,
+            ) == (
+                other._config.dialect,
+                other._config.token,
+                other._config.host,
+                other._config.port,
+                other._config.httpPath,
+            )
+        return False
diff --git a/pandasai/connectors/snowflake.py b/pandasai/connectors/snowflake.py
@@ -90,3 +90,18 @@ def __repr__(self):
             f"database={self._config.database} schema={str(self._config.dbSchema)}  "
             f"table={self._config.table}>"
         )
+
+    def equals(self, other):
+        if isinstance(other, self.__class__):
+            return (
+                self._config.dialect,
+                self._config.account,
+                self._config.username,
+                self._config.password,
+            ) == (
+                other._config.dialect,
+                other._config.account,
+                other._config.username,
+                other._config.password,
+            )
+        return False
-    def equals(self, other):
-        if isinstance(other, self.__class__):
-            return (
-                self._config.dialect,
-                self._config.account,
-                self._config.username,
-                self._config.password,
-            ) == (
-                other._config.dialect,
-                other._config.account,
-                other._config.username,
-                other._config.password,
-            )
-        return False
+    def equals(self, other):
+        """
+        Compare the current object with another object for equality.
+
+        Args:
+            other: The object to compare with.
+
+        Returns:
+            True if the objects are equal, False otherwise.
+        """
+        if isinstance(other, self.__class__):
+            return (
+                self._config.dialect,
+                self._config.account,
+                self._config.username,
+                hash(self._config.password),
+            ) == (
+                other._config.dialect,
+                other._config.account,
+                other._config.username,
+                hash(other._config.password),
+            )
+        return False
-    def equals(self, other):
-        if isinstance(other, self.__class__):
-            return (
-                self._config.dialect,
-                self._config.account,
-                self._config.username,
-                self._config.password,
-            ) == (
-                other._config.dialect,
-                other._config.account,
-                other._config.username,
-                other._config.password,
-            )
-        return False
+    def equals(self, other):
+        """
+        Compare the current object with another object for equality.
+
+        Args:
+            other: The object to compare with.
+
+        Returns:
+            True if the objects are equal, False otherwise.
+        """
+        if isinstance(other, self.__class__):
+            return (
+                self._config.dialect,
+                self._config.account,
+                self._config.username,
+                hash(self._config.password),
+            ) == (
+                other._config.dialect,
+                other._config.account,
+                other._config.username,
+                hash(other._config.password),
+            )
+        return False
diff --git a/pandasai/connectors/sql.py b/pandasai/connectors/sql.py
@@ -5,6 +5,8 @@
 import re
 import os
 import pandas as pd
+
+from pandasai.exceptions import MaliciousQueryError
 from .base import BaseConnector, SQLConnectorConfig, SqliteConnectorConfig
 from .base import BaseConnectorConfig
 from sqlalchemy import create_engine, text, select, asc
@@ -246,9 +248,7 @@ def execute(self):
             DataFrame: The result of the SQL query.
         """
 
-        if cached := self._cached() or self._cached(
-            include_additional_filters=True
-        ):
+        if cached := self._cached() or self._cached(include_additional_filters=True):
             return pd.read_parquet(cached)
 
         if self.logger:
@@ -362,6 +362,46 @@ def column_hash(self):
     def fallback_name(self):
         return self._config.table
 
+    def equals(self, other):
+        if isinstance(other, self.__class__):
+            return (
+                self._config.dialect,
+                self._config.driver,
+                self._config.host,
+                self._config.port,
+                self._config.username,
+                self._config.password,
+            ) == (
+                other._config.dialect,
+                other._config.driver,
+                other._config.host,
+                other._config.port,
+                other._config.username,
+                other._config.password,
+            )
+        return False
+
+    def _is_sql_query_safe(self, query: str):
+        infected_keywords = [
+            r"\bINSERT\b",
+            r"\bUPDATE\b",
+            r"\bDELETE\b",
+            r"\bDROP\b",
+            r"\bEXEC\b",
+            r"\bALTER\b",
+            r"\bCREATE\b",
+        ]
+
+        return not any(
+            re.search(keyword, query, re.IGNORECASE) for keyword in infected_keywords
+        )
+
+    def execute_direct_sql_query(self, sql_query):
+        if not self._is_sql_query_safe(sql_query):
+            raise MaliciousQueryError("Malicious query is generated in code")
+
+        return pd.read_sql(sql_query, self._connection)
+
 
 class SqliteConnector(SQLConnector):
     """
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		If the user requests to create a chart, utilize the Python {library} library to generate high-quality graphics that will be saved directly to a file.