getredash · rameshramachandran2 · Feb 6, 2024 · Feb 7, 2024 · Feb 7, 2024
diff --git a/client/app/assets/images/db-logos/s3.png b/client/app/assets/images/db-logos/s3.png
diff --git a/client/app/assets/images/db-logos/splunk.png b/client/app/assets/images/db-logos/splunk.png
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -85,6 +85,7 @@ werkzeug = "2.3.8"
 wtforms = "2.2.1"
 xlsxwriter = "1.2.2"
 tzlocal = "4.3.1"
+splunk-sdk = "^1.7.4"
 
 [tool.poetry.group.all_ds]
 optional = true

diff --git a/redash/query_runner/s3.py b/redash/query_runner/s3.py
@@ -0,0 +1,114 @@
+import boto3
+import pandas as pd
+from redash.query_runner import BaseQueryRunner, register
+from redash.query_runner import TYPE_STRING, TYPE_INTEGER, TYPE_BOOLEAN, TYPE_FLOAT, TYPE_DATE, TYPE_DATETIME
+from redash.utils import json_dumps, json_loads
+import logging
+
+TYPES_MAP = {
+    "bool": TYPE_BOOLEAN,
+    "datetime64[ns]": TYPE_DATETIME,
+    "datetime64[s]": TYPE_DATETIME,
+    "float64": TYPE_FLOAT,
+    "int64": TYPE_INTEGER,
+    "object": TYPE_STRING
+}
+
+logger = logging.getLogger(__name__)
+
+class S3(BaseQueryRunner):
+    @classmethod
+    def name(cls):
+        return "Amazon S3"
+    @classmethod
+    def configuration_schema(cls):
+        return {
+            "type": "object",
+            "properties": {
+                "region": {"type": "string", "title": "AWS Region"},
+                "bucket_name": {"type": "string", "title": "Bucket Name"},
+                "object_key": {"type": "string", "title": "Object Key"}
+            },
+            "required": ["region", "bucket_name", "object_key"],
+            "order": ["region", "bucket_name", "object_key"],
+        }
+    def test_connection(self):
+        region = self.configuration["region"]
+        bucket_name = self.configuration["bucket_name"]
+        object_key = self.configuration["object_key"]
+
+        # Set S3 client using Boto3
+        s3_client = boto3.client("s3")
+
+        query = "SELECT * from S3Object"
+        # As of now we are required to pass in the object key so we are configuring the data source to a particular S3 object temporarily
+        resp = s3_client.select_object_content(
+            Bucket=bucket_name,
+            Key= object_key, # We need the CSV file (Object Key)
+            ExpressionType='SQL',
+            Expression=query,
+            InputSerialization = {'CSV': {"FileHeaderInfo": "Use"}, 'CompressionType': 'NONE'},
+            OutputSerialization = {'JSON': {}},
+        )
+
+        # Need to first deploy this to see how response data schema is before we can parse it into rows/columns
+        for event in resp['Payload']:
+            if 'Records' in event:
+                records = event['Records']['Payload']
+                logger.info("Records: %s", records)
+
+    def run_query(self, query, user):
+        region = self.configuration["region"]
+        bucket_name = self.configuration["bucket_name"]
+        object_key = self.configuration["object_key"]
+
+        # Set S3 client using Boto3
+        s3_client = boto3.client("s3")
+
+        # As of now we are required to pass in the object key so we are configuring the data source to a particular S3 object temporarily
+        resp = s3_client.select_object_content(
+            Bucket=bucket_name,
+            Key= object_key, # We need the CSV file (Object Key)
+            ExpressionType='SQL',
+            Expression=query,
+            InputSerialization = {'CSV': {"FileHeaderInfo": "Use"}, 'CompressionType': 'NONE'},
+            OutputSerialization = {'JSON': {}},
+        )
+
+        # Need to first deploy this to see how response data schema is before we can parse it into rows/columns
+        json_result = ""
+        for event in resp['Payload']:
+            if 'Records' in event:
+                json_result = event['Records']['Payload']
+                logger.info("Records: %s", json_result)
+
+        json_result = json_result.decode('utf8')
+        json_result = json_result.replace('\n', '')
+        json_result = json_result.replace('\\r', '')
+        json_result = json_result.replace('}{', '},{')
+        json_result = "[" + json_result + "]"
+        logger.info("JSON: %s", json_result)
+        dict_result = json_loads(json_result)
+        logger.info("DictResult: %s", dict_result)
+        df = pd.DataFrame(dict_result)
+        logger.info("DataFrame: %s", df.to_string())
+        columns = []
+        rows = df.to_dict('records')
+
+        for col in df.columns:
+            columns.append(
+                {
+                    "name": col,
+                    "friendly_name": col,
+                    "type": TYPES_MAP[str(df[col].dtype)]
+                }
+            )
+
+        # Returning the query results in Redash format
+        data = {"columns": columns, "rows": rows}
+        error = None
+        json_data = json_dumps(data)
+        return json_data, error
+
+# Registering custom S3 query runner
+register(S3)
diff --git a/redash/query_runner/splunk.py b/redash/query_runner/splunk.py
@@ -0,0 +1,121 @@
+import pandas as pd
+from redash.query_runner import BaseQueryRunner, register
+from redash.query_runner import TYPE_STRING, TYPE_INTEGER, TYPE_BOOLEAN, TYPE_FLOAT, TYPE_DATE, TYPE_DATETIME
+from redash.utils import json_dumps, json_loads
+import splunklib.client  as client
+import splunklib.results as results
+import requests
+import json
+import time
+import logging
+from redash.query_runner import *
+
+TYPES_MAP = {
+    "bool": TYPE_BOOLEAN,
+    "datetime64[ns]": TYPE_DATETIME,
+    "datetime64[s]": TYPE_DATETIME,
+    "float64": TYPE_FLOAT,
+    "int64": TYPE_INTEGER,
+    "object": TYPE_STRING
+}
+
+logger = logging.getLogger(__name__)
+
+class Splunk(BaseQueryRunner):
+
+    @classmethod
+    def configuration_schema(cls):
+        return {
+            "type": "object",
+            "properties": {
+                "splunk_url": {"type": "string", "title": "Splunk Server URL"},
+                "username": {"type": "string", "title": "Username"},
+                "password": {"type": "string", "title": "Password", "secret": True}#,
+                # "Use-SSL": {"type": "boolean", "title": "Use SSL", "default": True}
+            },
+            "required": ["splunk_url", "username", "password"],
+            "order": ["splunk_url", "username", "password"]
+        }
+
+    # @classmethod
+    # def annotate_query(cls):
+
+    def test_connection(self):
+        # host = self.configuration_schema['splunk_url']
+        # username = self.configuration_schema['username']
+        # password = self.configuration_schema['password']
+        service = client.connect(
+        host=(self.configuration.get("splunk_url") or None),
+        username=(self.configuration.get("username") or None),
+        password=(self.configuration.get("password") or None)
+        )
+
+        # for index in service.indexes:
+        #     logger.info("Splunk Index: %s", index.name)
+
+        # for app in service.apps:
+        #     logger.info("Splunk Application: %s", app.name)
+
+        # query = "search * | head 10"
+        # result = service.jobs.oneshot(query)
+        # reader = results.ResultsReader(result)
+        # df = pd.DataFrame(reader)
+        # logger.info("DataFrame: %s", df.to_string())
+        # columns = []
+        # rows = df.to_dict('records')
+
+        # for col in df.columns:
+        #     columns.append(
+        #         {
+        #             "name": col,
+        #             "friendly_name": col,
+        #             "type": TYPES_MAP[str(df[col].dtype)]
+        #         }
+        #     )
+
+        # for i in reader:
+        #     logger.info("-----------Result-------------")
+        #     for key, value in i.items():
+        #         logger.info("Key: %s, Value: %s", key, value)
+
+        service.logout()
+
+
+    def run_query(self, query, user):
+        query = query.split("*/ ",1)[1]
+        logger.info("Query: %s", query)
+        service = client.connect(
+        host=(self.configuration.get("splunk_url") or None),
+        username=(self.configuration.get("username") or None),
+        password=(self.configuration.get("password") or None)
+        )
+
+        try:
+            result = service.jobs.oneshot(query)
+            reader = results.ResultsReader(result)
+            df = pd.DataFrame(reader)
+            logger.info("DataFrame: %s", df.to_string())
+            columns = []
+            rows = df.to_dict('records')
+
+            for col in df.columns:
+                columns.append(
+                    {
+                        "name": col,
+                        "friendly_name": col,
+                        "type": TYPES_MAP[str(df[col].dtype)]
+                    }
+                )
+
+            data = {"columns": columns, "rows": rows}
+            error = None
+            json_data = json_dumps(data)
+        except (SyntaxError, RuntimeError) as e:
+            error = e.message
+            json_data = None
+        finally:
+            service.logout()
+
+        return json_data, error
+
+register(Splunk)
diff --git a/redash/settings/__init__.py b/redash/settings/__init__.py
@@ -280,6 +280,7 @@ def email_server_is_configured():
     "redash.query_runner.google_spreadsheets",
     "redash.query_runner.graphite",
     "redash.query_runner.mongodb",
+    "redash.query_runner.s3",
     "redash.query_runner.couchbase",
     "redash.query_runner.mysql",
     "redash.query_runner.pg",
@@ -298,7 +299,7 @@ def email_server_is_configured():
     "redash.query_runner.vertica",
     "redash.query_runner.clickhouse",
     "redash.query_runner.tinybird",
-    "redash.query_runner.yandex_metrica",
+    #"redash.query_runner.yandex_metrica",
     "redash.query_runner.yandex_disk",
     "redash.query_runner.rockset",
     "redash.query_runner.treasuredata",
@@ -338,6 +339,7 @@ def email_server_is_configured():
     "redash.query_runner.google_search_console",
     "redash.query_runner.ignite",
     "redash.query_runner.oracle",
+    "redash.query_runner.splunk",
     "redash.query_runner.e6data",
 ]