From 3093d8777d16e814c72bb7744a215cd3d0a805b8 Mon Sep 17 00:00:00 2001 From: Ravi Mula Date: Thu, 25 Apr 2024 08:34:20 +0530 Subject: [PATCH 01/13] python sdk init (#1) --- .gitignore | 18 + obsrv/__init__.py | 0 obsrv/common/__init__.py | 1 + obsrv/common/exception.py | 13 + obsrv/connector/__init__.py | 2 + obsrv/connector/batch/__init__.py | 2 + obsrv/connector/batch/obsrv_dataset.py | 73 +++ obsrv/connector/batch/source.py | 200 +++++++ obsrv/connector/metrics_collector.py | 36 ++ obsrv/connector/registry.py | 148 ++++++ obsrv/job/batch/__init__.py | 1 + obsrv/job/batch/utils.py | 29 + obsrv/models/__init__.py | 2 + obsrv/models/data_models.py | 46 ++ obsrv/models/metric.py | 62 +++ obsrv/utils/__init__.py | 5 + obsrv/utils/config.py | 20 + obsrv/utils/db_util.py | 47 ++ obsrv/utils/encyption.py | 22 + obsrv/utils/json_util.py | 26 + obsrv/utils/logger.py | 21 + obsrv/utils/time.py | 15 + poetry.lock | 708 +++++++++++++++++++++++++ pyproject.toml | 27 + tests/__init__.py | 0 tests/create_tables.py | 121 +++++ tests/sample_data/nyt_data_100.json | 100 ++++ tests/sample_data/nyt_data_100.json.gz | Bin 0 -> 10228 bytes tests/test_batch_connector.py | 126 +++++ tests/test_conf.yaml | 21 + tests/test_connector_registry.py | 26 + tests/test_encryption_utils.py | 36 ++ tests/test_setup.py | 24 + 33 files changed, 1978 insertions(+) create mode 100644 .gitignore create mode 100644 obsrv/__init__.py create mode 100644 obsrv/common/__init__.py create mode 100644 obsrv/common/exception.py create mode 100644 obsrv/connector/__init__.py create mode 100644 obsrv/connector/batch/__init__.py create mode 100644 obsrv/connector/batch/obsrv_dataset.py create mode 100644 obsrv/connector/batch/source.py create mode 100644 obsrv/connector/metrics_collector.py create mode 100644 obsrv/connector/registry.py create mode 100644 obsrv/job/batch/__init__.py create mode 100644 obsrv/job/batch/utils.py create mode 100644 obsrv/models/__init__.py create mode 100644 obsrv/models/data_models.py create mode 100644 obsrv/models/metric.py create mode 100644 obsrv/utils/__init__.py create mode 100644 obsrv/utils/config.py create mode 100644 obsrv/utils/db_util.py create mode 100644 obsrv/utils/encyption.py create mode 100644 obsrv/utils/json_util.py create mode 100644 obsrv/utils/logger.py create mode 100644 obsrv/utils/time.py create mode 100644 poetry.lock create mode 100644 pyproject.toml create mode 100644 tests/__init__.py create mode 100644 tests/create_tables.py create mode 100644 tests/sample_data/nyt_data_100.json create mode 100644 tests/sample_data/nyt_data_100.json.gz create mode 100644 tests/test_batch_connector.py create mode 100644 tests/test_conf.yaml create mode 100644 tests/test_connector_registry.py create mode 100644 tests/test_encryption_utils.py create mode 100644 tests/test_setup.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a85a8c3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,18 @@ +__pycache__ +*.pyc + +# Packages +/dist/* + +# Unit test / coverage reports +.coverage +.pytest_cache + +.DS_Store +.python-version +.vscode/* + +/docs/site/* + +.venv +/poetry.toml \ No newline at end of file diff --git a/obsrv/__init__.py b/obsrv/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/obsrv/common/__init__.py b/obsrv/common/__init__.py new file mode 100644 index 0000000..9001a4a --- /dev/null +++ b/obsrv/common/__init__.py @@ -0,0 +1 @@ +from .exception import ObsrvException \ No newline at end of file diff --git a/obsrv/common/exception.py b/obsrv/common/exception.py new file mode 100644 index 0000000..9e44e79 --- /dev/null +++ b/obsrv/common/exception.py @@ -0,0 +1,13 @@ +from logging import getLogger +from obsrv.models import ErrorData + +logger = getLogger(__name__) + +class ObsrvException(Exception): + def __init__(self, error): + self.error = error + super().__init__(self.error.error_msg) + +# class UnsupportedDataFormatException(ObsrvException): +# def __init__(self, data_format): +# super().__init__(ErrorData("DATA_FORMAT_ERR", f"Unsupported data format {data_format}")) \ No newline at end of file diff --git a/obsrv/connector/__init__.py b/obsrv/connector/__init__.py new file mode 100644 index 0000000..3f0e86e --- /dev/null +++ b/obsrv/connector/__init__.py @@ -0,0 +1,2 @@ +from .metrics_collector import MetricsCollector +from .registry import ConnectorContext, ConnectorInstance \ No newline at end of file diff --git a/obsrv/connector/batch/__init__.py b/obsrv/connector/batch/__init__.py new file mode 100644 index 0000000..cfabb80 --- /dev/null +++ b/obsrv/connector/batch/__init__.py @@ -0,0 +1,2 @@ +from .obsrv_dataset import ObsrvDataset +from .source import ISourceConnector, SourceConnector \ No newline at end of file diff --git a/obsrv/connector/batch/obsrv_dataset.py b/obsrv/connector/batch/obsrv_dataset.py new file mode 100644 index 0000000..03553e3 --- /dev/null +++ b/obsrv/connector/batch/obsrv_dataset.py @@ -0,0 +1,73 @@ +import json +import time +from pyspark.sql import DataFrame +from pyspark.sql.functions import lit, struct, to_json, from_json, length +from pyspark.sql.types import StructType, StructField, StringType +from obsrv.utils import LoggerController + +logger = LoggerController(__name__) + +class ObsrvDataset: + def __init__(self, ds: DataFrame): + self.ds = ds + self.invalid_events = None + self.valid_events = None + + def filter_events(self, ctx, config): + max_event_size = config.find("kafka.producer.max-request-size", 1000000) + self.ds = self.ds.withColumn("_obsrv_tmp_size", length(to_json(struct("*")))) + self.invalid_events = self.ds.filter(self.ds._obsrv_tmp_size > max_event_size).drop("_obsrv_tmp_size") + self.valid_events = self.ds.filter(self.ds._obsrv_tmp_size <= max_event_size).drop("_obsrv_tmp_size") + + def append_obsrv_meta(self, ctx): + addn_meta = False + + source_meta = [ + StructField("connector", StringType(), True), + StructField("connectorInstance", StringType(), True) + ] + if "_addn_source_meta" in self.ds.columns: + addn_meta = True + source_meta.append(StructField("_addn_source_meta", StringType(), True)) + addn_meta_data = self.ds.select("_addn_source_meta").collect()[0][0].replace('"', "'") + self.ds = self.ds.drop("_addn_source_meta") + + obsrv_meta_schema = StructType( + [ + StructField("syncts", StringType(), True), + StructField("flags", StringType(), True), + StructField("timespans", StringType(), True), + StructField("error", StringType(), True), + StructField("source", StructType(source_meta), True) + ] + ) + + syncts = int(time.time() * 1000) + obsrv_meta = { + "syncts": syncts, + "flags": {}, + "timespans": {}, + "error": {}, + "source": { + "connector": ctx.connector_id, + "connectorInstance": ctx.connector_instance_id + } + } + + if addn_meta: + obsrv_meta["source"]["_addn_source_meta"] = addn_meta_data + + obsrv_meta_struct = from_json(lit(json.dumps(obsrv_meta)), obsrv_meta_schema) + self.ds = self.ds.withColumn("obsrv_meta", obsrv_meta_struct) + + def save_to_kafka(self, config, topic): + kafka_servers = config.find("kafka.bootstrap-servers", "localhost:9092") + compression_type = config.find("kafka.producer.compression", "snappy") + + logger.info("saving valid events to kafka topic `%s` on `%s`", topic, kafka_servers) + + self.valid_events.selectExpr("to_json(struct(*)) AS value").write.format("kafka") \ + .option("kafka.bootstrap.servers", kafka_servers) \ + .option("kafka.compression.type", compression_type) \ + .option("topic", topic)\ + .save() diff --git a/obsrv/connector/batch/source.py b/obsrv/connector/batch/source.py new file mode 100644 index 0000000..c5fdca0 --- /dev/null +++ b/obsrv/connector/batch/source.py @@ -0,0 +1,200 @@ +import time +from typing import Dict, Any, final, AnyStr, Iterator +from pyspark.sql import SparkSession, DataFrame +from pyspark.conf import SparkConf + +from obsrv.common import ObsrvException +from obsrv.models import ExecutionMetric, ErrorData +from obsrv.utils import EncryptionUtil, Config, LoggerController +from obsrv.connector import MetricsCollector, ConnectorInstance, ConnectorContext +from obsrv.connector.registry import ConnectorRegistry +from obsrv.connector.batch.obsrv_dataset import ObsrvDataset + +from abc import ABC, abstractmethod + +logger = LoggerController(__name__) + +class ISourceConnector(ABC): + + @final + def execute(self, ctx, connector_config, sc, metrics_collector) -> Any: + results = self.process(sc, ctx, connector_config, metrics_collector) + + return results + + # if isinstance(results, DataFrame): + # return results + # # elif isinstance(results, Iterator): + # else: + # for result in results: + # yield result + # else: + # return results + + @abstractmethod + def get_spark_conf(self, connector_config) -> SparkConf: + pass + + @abstractmethod + def process(self, sc, ctx, connector_config) -> Any: + pass + +class SourceConnector: + + @final + def get_connector_instance(connector_instance_id: Any, postgres_config: Any) -> ConnectorInstance: + return ConnectorRegistry.get_connector_instance(connector_instance_id, postgres_config) + + @final + def get_connector_config(connector_instance: ConnectorInstance) -> Dict[Any, Any]: + return connector_instance.connector_config + + @final + def get_additional_config(spark_conf: SparkConf) -> SparkConf: + addn_jars = [ + 'org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.1' + ] + configured_jars = spark_conf.get("spark.jars.packages", "") + if len(configured_jars): + spark_conf.set("spark.jars.packages", f"{configured_jars},{','.join(addn_jars)}") + else: + spark_conf.set("spark.jars.packages", ','.join(addn_jars)) + return spark_conf + + @final + def get_spark_session(ctx: ConnectorContext, connector_config: Dict[Any, Any], spark_conf: SparkConf) -> SparkSession: + spark_conf = SourceConnector.get_additional_config(spark_conf) + try: + sc = SparkSession.builder.appName(ctx.connector_id).config(conf=spark_conf).getOrCreate() + return sc + except Exception as e: + logger.exception(f"Error creating spark session: {str(e)}") + + @final + def process_connector(connector: ISourceConnector, ctx: ConnectorContext, connector_config: Dict[Any, Any], config: Dict[Any, Any], sc: SparkSession, metrics_collector: MetricsCollector) -> ExecutionMetric: + valid_records, failed_records, framework_exec_time = 0, 0, 0 + results = connector.execute(ctx=ctx, connector_config=connector_config, sc=sc, metrics_collector=metrics_collector) + + if isinstance(results, DataFrame): + logger.info("processing single dataframe with %s records", results.count()) + res = SourceConnector.process_result(results, ctx, config) + valid_records += res[0] + failed_records += res[1] + framework_exec_time += res[2] + else: + # if isinstance(results, Iterator): + logger.info("processing iterator in process_connector") + for result in results: + logger.info("processing dataframe from generator with %s records", result.count()) + res = SourceConnector.process_result(result, ctx, config) + valid_records += res[0] + failed_records += res[1] + framework_exec_time += res[2] + # else: + # res = SourceConnector.process_result(results, ctx, config) + # valid_records += res[0] + # failed_records += res[1] + # framework_exec_time += res[2] + + return ExecutionMetric( + totalRecords=valid_records + failed_records, + failedRecords=failed_records, successRecords=valid_records, + connectorExecTime=0, frameworkExecTime=framework_exec_time, totalExecTime=0 + ) + + def process_result(result, ctx, config): + start_time = time.time() + if "dataset" in result.columns: + result = result.drop("dataset") + if "obsrv_meta" in result.columns: + result = result.drop("obsrv_meta") + dataset = ObsrvDataset(result) + dataset.append_obsrv_meta(ctx) + + logger.info("Total records post filter: %s", dataset.ds.count()) + + dataset.filter_events(ctx, config) + failed_events = dataset.invalid_events + valid_events = dataset.valid_events + failed_records_count = failed_events.count() + valid_records_count = valid_events.count() + + dataset.save_to_kafka(config, ctx.entry_topic) + end_time = time.time() + + return (valid_records_count, failed_records_count, end_time - start_time) + + + @final + def process(connector: ISourceConnector, config_file_path: AnyStr, **kwargs): + + start_time = time.time() + config = Config(config_file_path) + connector_instance_id = config.find("connector_instance_id") + connector_instance = SourceConnector.get_connector_instance(connector_instance_id, config.find("postgres")) + + if connector_instance is None: + raise Exception("Connector instance not found") + + ctx = connector_instance.connector_context + # TODO: Move this to separate method + ctx.building_block = config.find("building-block", None) + ctx.env = config.find("env", None) + connector_config = SourceConnector.get_connector_config(connector_instance) + if 'is_encrypted' in connector_config and connector_config['is_encrypted']: + encryption_util = EncryptionUtil(config.find("obsrv_encryption_key")) + connector_config = encryption_util.decrypt(connector_config) + + metrics_collector = MetricsCollector(ctx) + sc = SourceConnector.get_spark_session(ctx, connector_config, connector.get_spark_conf(connector_config)) + connector_processing_start = time.time() + + try: + execution_metric = SourceConnector.process_connector(connector=connector, ctx=ctx, connector_config=connector_config, config=config, sc=sc, metrics_collector=metrics_collector) + end_time = time.time() + + metric_event = ExecutionMetric( + totalRecords=execution_metric.totalRecords, + failedRecords=execution_metric.failedRecords, + successRecords=execution_metric.successRecords, + connectorExecTime=end_time - connector_processing_start, + frameworkExecTime=execution_metric.frameworkExecTime + end_time - start_time, + totalExecTime=end_time - start_time + ) + logger.info("Metrics: %s", metric_event.to_json()) + metrics_collector.collect(metric=metric_event.to_json()) + + except Exception as e: + logger.exception(f"error processing connector: {str(e)}") + ObsrvException(ErrorData("CONNECTOR_PROCESS_ERR", f"error processing connector: {str(e)}")) + + finally: + kafka_servers = config.find("kafka.bootstrap-servers", "localhost:9092") + compression_type = config.find("kafka.producer.compression", "snappy") + + sc.createDataFrame(metrics_collector.to_seq(), SourceConnector.get_metrics_schema()) \ + .selectExpr("to_json(struct(*)) AS value").write.format("kafka") \ + .option("kafka.bootstrap.servers", kafka_servers) \ + .option("kafka.compression.type", compression_type) \ + .option("topic", config.find("kafka.connector-metrics-topic")) \ + .save() + sc.stop() + + def get_metrics_schema(): + from pyspark.sql.types import StructType, StructField, StringType, LongType, MapType, DoubleType, ArrayType + + schema = StructType([ + StructField("actor", MapType(StringType(), StringType()), nullable=False), + StructField("context", MapType(StringType(), MapType(StringType(), StringType())), nullable=False), + StructField("edata", StructType([ + StructField("labels", ArrayType(MapType(StringType(), StringType())), nullable=False), + StructField("metric", MapType(StringType(), DoubleType()), nullable=False) + ]), nullable=False), + StructField("eid", StringType(), nullable=False), + StructField("ets", LongType(), nullable=False), + StructField("mid", StringType(), nullable=False), + StructField("object", MapType(StringType(), StringType()), nullable=False) + ]) + + return schema + diff --git a/obsrv/connector/metrics_collector.py b/obsrv/connector/metrics_collector.py new file mode 100644 index 0000000..24b54f6 --- /dev/null +++ b/obsrv/connector/metrics_collector.py @@ -0,0 +1,36 @@ +import uuid +import time +from typing import List, Dict +from obsrv.models import EventID, Metric, MetricContext, MetricData + +class MetricsCollector: + def __init__(self, ctx): + self.metric_labels = [ + {"key": "type", "value": "Connector"}, + {"key": "job", "value": ctx.connector_id}, + {"key": "instance", "value": ctx.connector_instance_id}, + {"key": "dataset", "value": ctx.dataset_id} + ] + self.metric_context = MetricContext(pdata = {"id": "Connector", "pid": ctx.connector_id}) + self.metric_actor = {"id": ctx.connector_id, "type": "SYSTEM"} + self.metric_object = {"id": ctx.dataset_id, "type": "Dataset"} + + self.metrics = [] + + def collect(self, metric, value=None, addn_labels=[]): + if isinstance(metric, str): + self.metrics.append(self.generate({metric: value}, addn_labels)) + elif isinstance(metric, dict): + self.metrics.append(self.generate(metric, addn_labels)) + + def generate(self, metric_map: Dict, addn_labels: List): + return Metric( + eid=EventID.METRIC.value, ets=int(time.time() * 1000), mid=str(uuid.uuid4()), + actor=self.metric_actor, + context=self.metric_context, + object=self.metric_object, + edata=MetricData(metric=metric_map, labels=self.metric_labels+addn_labels) + ) + + def to_seq(self): + return [metric.to_json() for metric in self.metrics] \ No newline at end of file diff --git a/obsrv/connector/registry.py b/obsrv/connector/registry.py new file mode 100644 index 0000000..144fedc --- /dev/null +++ b/obsrv/connector/registry.py @@ -0,0 +1,148 @@ +import json +from dataclasses import dataclass +from typing import Optional +from obsrv.common import ObsrvException +from obsrv.models import ErrorData +from obsrv.utils import PostgresConnect + + +@dataclass +class ConnectorContext: + connector_id: str + dataset_id: str + connector_instance_id: str + connector_type: str + # data_format: str + entry_topic: Optional[str] = None + building_block: Optional[str] = None + env: Optional[str] = None + state: Optional['ConnectorState'] = None + stats: Optional['ConnectorStats'] = None + +@dataclass +class ConnectorInstance: + connector_context: ConnectorContext + connector_config: str + operations_config: str + status: str + +class ConnectorState: + def __init__(self, postgres_config, connector_instance_id, state_json=None): + self.connector_instance_id = connector_instance_id + self.postgres_config = postgres_config + self.state = state_json if state_json else {} + + def get_state(self, attribute, default_value=None): + return self.state.get(attribute, default_value) + + def put_state(self, attribute, value): + self.state[attribute] = value + + def remove_state(self, attribute): + return self.state.pop(attribute, None) + + def contains(self, attribute): + return attribute in self.state + + def to_json(self): + return json.dumps(self.state, default=str) + + # @staticmethod + def save_state(self): + count = ConnectorRegistry.update_connector_state(self.connector_instance_id, self.postgres_config, self.to_json()) + if count != 1: + raise ObsrvException(ErrorData("CONN_STATE_SAVE_FAILED", "Unable to save the connector state")) + +class ConnectorStats: + def __init__(self, postgres_config, connector_instance_id, stats_json=None): + self.connector_instance_id = connector_instance_id + self.postgres_config = postgres_config + self.stats = stats_json if stats_json else {} + + def get_stat(self, metric, default_value=None): + return self.stats.get(metric, default_value) + + def put_stat(self, metric, value): + self.stats[metric] = value + + def remove_stat(self, metric): + return self.stats.pop(metric, None) + + def to_json(self): + return json.dumps(self.stats, default=str) + + def save_stats(self): + upd_count = ConnectorRegistry.update_connector_stats(self.connector_instance_id, self.postgres_config, self.to_json()) + if upd_count != 1: + raise ObsrvException(ErrorData("CONN_STATS_SAVE_FAILED", "Unable to save the connector stats")) + +class ConnectorRegistry: + @staticmethod + def get_connector_instances(connector_id, postgres_config): + postgres_connect = PostgresConnect(postgres_config) + query = """ + SELECT ci.*, d.dataset_config + FROM connector_instances as ci + JOIN datasets d ON ci.dataset_id = d.id + WHERE ci.connector_id = '{}' AND d.status = 'Live' AND ci.status = 'Live' + """.format(connector_id) + result = postgres_connect.execute_select_all(query) + return [parse_connector_instance(row, postgres_config) for row in result] + + @staticmethod + def get_connector_instance(connector_instance_id, postgres_config): + postgres_connect = PostgresConnect(postgres_config) + query = """ + SELECT ci.*, d.dataset_config + FROM connector_instances as ci + JOIN datasets d ON ci.dataset_id = d.id + WHERE ci.id = '{}' AND d.status = 'Live' AND ci.status = 'Live' + """.format(connector_instance_id) + result = postgres_connect.execute_select_one(query) + return parse_connector_instance(result, postgres_config) if result else None + + @staticmethod + def update_connector_state(connector_instance_id, postgres_config, state): + postgres_connect = PostgresConnect(postgres_config) + query = """ + UPDATE connector_instances SET connector_state = '{}' WHERE id = '{}' + """.format(state, connector_instance_id) + return postgres_connect.execute_upsert(query) + + @staticmethod + def update_connector_stats(connector_instance_id, postgres_config, stats): + postgres_connect = PostgresConnect(postgres_config) + query = """ + UPDATE connector_instances SET connector_stats = '{}' WHERE id = '{}' + """.format(stats, connector_instance_id) + return postgres_connect.execute_upsert(query) + +def parse_connector_instance(rs, postgres_config) -> ConnectorInstance: + id = rs['id'] + dataset_id = rs['dataset_id'] + connector_id = rs['connector_id'] + connector_type = rs['connector_type'] + connector_config = rs['connector_config'] + # data_format = connector_config['fileFormat']['type'] + operations_config = rs['operations_config'] + status = rs['status'] + dataset_config = rs['dataset_config'] + connector_state = rs.get('connector_state', {}) + connector_stats = rs.get('connector_stats', {}) + entry_topic = dataset_config.get('entry_topic', 'dev.ingest') + + return ConnectorInstance( + connector_context=ConnectorContext( + connector_id=connector_id, + dataset_id=dataset_id, + connector_instance_id=id, + connector_type=connector_type, + # data_format=data_format, + entry_topic=entry_topic, + state = ConnectorState(postgres_config, id, connector_state), + stats = ConnectorStats(postgres_config, id, connector_stats) + ), + connector_config=connector_config, + operations_config=operations_config, + status=status + ) \ No newline at end of file diff --git a/obsrv/job/batch/__init__.py b/obsrv/job/batch/__init__.py new file mode 100644 index 0000000..185dc3a --- /dev/null +++ b/obsrv/job/batch/__init__.py @@ -0,0 +1 @@ +from .utils import get_base_conf \ No newline at end of file diff --git a/obsrv/job/batch/utils.py b/obsrv/job/batch/utils.py new file mode 100644 index 0000000..7efefa3 --- /dev/null +++ b/obsrv/job/batch/utils.py @@ -0,0 +1,29 @@ +from pyspark.conf import SparkConf + +def get_base_conf() -> SparkConf: + conf = SparkConf() + + # conf.setMaster("local") # Set master as local for testing + # conf.set("spark.jars.packages", "org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.1") # Include SQL Kafka to be able to write to kafka + conf.set("spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version", "2") # Set output committer algorithm version + conf.set("spark.speculation", "false") # Disable speculation + conf.set("spark.hadoop.mapreduce.map.speculative", "false") # Disable map speculative execution + conf.set("spark.hadoop.mapreduce.reduce.speculative", "false") # Disable reduce speculative execution + conf.set("spark.sql.parquet.filterPushdown", "true") # Enable Parquet filter pushdown + conf.set("spark.sql.sources.partitionOverwriteMode", "dynamic") # Set partition overwrite mode + conf.set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", "true") # Enable recursive directory listing + conf.set("spark.sql.execution.arrow.pyspark.enabled", "true") # Enable Apache Arrow optimization + conf.set("spark.executor.heartbeatInterval", "60s") # Set executor heartbeat interval + conf.set("spark.network.timeout", "600s") # Set network timeout + conf.set("spark.sql.shuffle.partitions", "200") # Set shuffle partitions + conf.set("spark.default.parallelism", "200") # Set default parallelism + conf.set("spark.sql.session.timeZone", "UTC") # Set timezone + conf.set("spark.sql.catalogImplementation", "hive") # Use Hive catalog implementation + conf.set("spark.sql.sources.partitionColumnTypeInference.enabled", "false") # Disable partition column type inference + conf.set("spark.hadoop.mapreduce.fileoutputcommitter.cleanup-failures.ignored", "true") # Ignore cleanup failures + conf.set("spark.hadoop.parquet.enable.summary-metadata", "false") # Disable summary metadata for Parquet + conf.set("spark.sql.sources.ignoreCorruptFiles", "true") # Ignore corrupt files + conf.set("spark.sql.adaptive.enabled", "true") # Enable adaptive query execution + conf.set("spark.sql.legacy.timeParserPolicy", "LEGACY") # Set time parser policy to LEGACY + + return conf \ No newline at end of file diff --git a/obsrv/models/__init__.py b/obsrv/models/__init__.py new file mode 100644 index 0000000..249f2f9 --- /dev/null +++ b/obsrv/models/__init__.py @@ -0,0 +1,2 @@ +from .data_models import ErrorData, EventID, StatusCode +from .metric import Metric, MetricContext, MetricData, ExecutionMetric \ No newline at end of file diff --git a/obsrv/models/data_models.py b/obsrv/models/data_models.py new file mode 100644 index 0000000..dd0dd41 --- /dev/null +++ b/obsrv/models/data_models.py @@ -0,0 +1,46 @@ +from enum import Enum +from dataclasses import dataclass +# from typing import Optional, Dict, Any + +class EventID(Enum): + LOG = "LOG" + METRIC = "METRIC" + +class StatusCode(Enum): + SUCCESS = "success" + FAILED = "failed" + + +@dataclass +class ErrorData: + error_code: str + error_msg: str + +# @dataclass +# class ContextData: +# connector_id: str +# dataset_id: str +# connector_instance_id: str +# connector_type: str +# data_format: str + +# @dataclass +# class ErrorLog: +# pdata_id: str +# pdata_status: StatusCode +# error_type: str +# error_code: str +# error_message: str +# error_count: Optional[int] = None + +# @dataclass +# class EData: +# error: Optional[ErrorLog] = None +# extra: Optional[Dict[str, Any]] = None + +# @dataclass +# class SystemEvent: +# etype: EventID +# ctx: ContextData +# data: EData +# ets: int \ No newline at end of file diff --git a/obsrv/models/metric.py b/obsrv/models/metric.py new file mode 100644 index 0000000..d5523e6 --- /dev/null +++ b/obsrv/models/metric.py @@ -0,0 +1,62 @@ +class Metric: + def __init__(self, eid, ets, mid, actor, context, object, edata): + self.eid = eid + self.ets = ets + self.mid = mid + self.actor = actor + self.context = context + self.object = object + self.edata = edata + + def to_json(self): + return { + "eid": self.eid, + "ets": self.ets, + "mid": self.mid, + "actor": self.actor, + "context": self.context.to_json(), + "object": self.object, + "edata": self.edata.to_json() + } + +class MetricContext: + def __init__(self, pdata): + self.pdata = pdata + + def to_json(self): + return { + "pdata": self.pdata + } + +class MetricData: + def __init__(self, metric, labels): + self.metric = {k: float(v) for k, v in metric.items()} + self.labels = labels + + def to_json(self): + return { + "metric": self.metric, + "labels": self.labels + } + +class ExecutionMetric: + def __init__(self, totalRecords, failedRecords, successRecords, connectorExecTime, frameworkExecTime, totalExecTime): + self.totalRecords = totalRecords + self.failedRecords = failedRecords + self.successRecords = successRecords + self.connectorExecTime = connectorExecTime + self.frameworkExecTime = frameworkExecTime + self.totalExecTime = totalExecTime + + def set(self, attr, value): + setattr(self, attr, value) + + def to_json(self): + return { + "total_records_count": self.totalRecords, + "failed_records_count": self.failedRecords, + "success_records_count": self.successRecords, + "total_exec_time_ms": self.totalExecTime, + "connector_exec_time_ms": self.connectorExecTime, + "fw_exec_time_ms": self.frameworkExecTime + } \ No newline at end of file diff --git a/obsrv/utils/__init__.py b/obsrv/utils/__init__.py new file mode 100644 index 0000000..9e35f04 --- /dev/null +++ b/obsrv/utils/__init__.py @@ -0,0 +1,5 @@ +from .config import Config +from .encyption import EncryptionUtil +from .db_util import PostgresConnect +from .time import time_it +from .logger import LoggerController \ No newline at end of file diff --git a/obsrv/utils/config.py b/obsrv/utils/config.py new file mode 100644 index 0000000..7fc5e3e --- /dev/null +++ b/obsrv/utils/config.py @@ -0,0 +1,20 @@ +import yaml +from yaml.loader import SafeLoader +from functools import reduce +import operator +from .logger import LoggerController + +logger = LoggerController(__name__) + +class Config: + def __init__(self, config_file_path): + with open(config_file_path) as config_file: + self.config = yaml.safe_load(config_file) + + def find(self, path, default=None): + try: + element_value = reduce(operator.getitem, path.split("."), self.config) + return element_value + except KeyError: + logger.exception("key `%s` not found in config", path) + return default \ No newline at end of file diff --git a/obsrv/utils/db_util.py b/obsrv/utils/db_util.py new file mode 100644 index 0000000..8b9ad91 --- /dev/null +++ b/obsrv/utils/db_util.py @@ -0,0 +1,47 @@ +import psycopg2 +import psycopg2.extras +import logging + +class PostgresConnect: + def __init__(self, config): + self.config = config + self.logger = logging.getLogger(__name__) + + def connect(self): + # TODO: Read the values from config file + db_host = self.config.get("host") + db_port = self.config.get("port") + db_user = self.config.get("user") + db_password = self.config.get("password") + database = self.config.get("dbname") + db_connection = psycopg2.connect( + database=database, + host=db_host, port=db_port, + user=db_user, password=db_password + ) + db_connection.autocommit = True + return db_connection + + def execute_select_one(self, sql): + db_connection = self.connect() + cursor = db_connection.cursor(cursor_factory=psycopg2.extras.DictCursor) + cursor.execute(sql) + result = dict(cursor.fetchone()) + db_connection.close() + return result + + def execute_select_all(self, sql): + db_connection = self.connect() + cursor = db_connection.cursor(cursor_factory=psycopg2.extras.DictCursor) + cursor.execute(sql) + result = [dict(r) for r in cursor.fetchall()] + db_connection.close() + return result + + def execute_upsert(self, sql): + db_connection = self.connect() + cursor = db_connection.cursor(cursor_factory=psycopg2.extras.DictCursor) + cursor.execute(sql) + record_count = cursor.rowcount + db_connection.close() + return record_count \ No newline at end of file diff --git a/obsrv/utils/encyption.py b/obsrv/utils/encyption.py new file mode 100644 index 0000000..2b39343 --- /dev/null +++ b/obsrv/utils/encyption.py @@ -0,0 +1,22 @@ +from base64 import b64encode, b64decode +from Crypto.Cipher import AES +from Crypto.Util.Padding import pad, unpad + +class EncryptionUtil: + def __init__(self, encryption_key): + self.algorithm = AES + self.key = encryption_key.encode('utf-8') + self.mode = AES.MODE_ECB + self.block_size = AES.block_size + + def encrypt(self, value): + cipher = self.algorithm.new(self.key, self.mode) + padded_value = pad(value.encode('utf-8'), self.block_size) + return b64encode(cipher.encrypt(padded_value)).decode('utf-8') + + def decrypt(self, value): + cipher = self.algorithm.new(self.key, self.mode) + decrypted_value64 = b64decode(value) + decrypted_byte_value = unpad(cipher.decrypt(decrypted_value64), self.block_size) + return decrypted_byte_value.decode('utf-8') + diff --git a/obsrv/utils/json_util.py b/obsrv/utils/json_util.py new file mode 100644 index 0000000..0c72f5e --- /dev/null +++ b/obsrv/utils/json_util.py @@ -0,0 +1,26 @@ +import json + +class JSONUtil: # pragma: no cover + @staticmethod + def serialize(obj): + if isinstance(obj, str): + return obj + else: + return json.dumps(obj, default=str) + + @staticmethod + def deserialize(json_str): + return json.loads(json_str) + + @staticmethod + def get_json_type(json_str): + try: + data = json.loads(json_str) + if isinstance(data, list): + return "ARRAY" + elif isinstance(data, dict): + return "OBJECT" + else: + return "NOT_A_JSON" + except json.JSONDecodeError: + return "NOT_A_JSON" \ No newline at end of file diff --git a/obsrv/utils/logger.py b/obsrv/utils/logger.py new file mode 100644 index 0000000..b7548a8 --- /dev/null +++ b/obsrv/utils/logger.py @@ -0,0 +1,21 @@ +import logging +from sys import stdout + +# logging.basicConfig(stream=stdout, format='%(asctime)s %(levelname)s :%(message)s') +# logger = logging.getLogger(__name__) +# logger.setLevel(logging.INFO) +class LoggerController(logging.Logger): + def __init__(self, name): + super().__init__(name) + + log_format = "%(asctime)s - %(levelname)s - %(message)s" + formatter = logging.Formatter(log_format) + + console_handler = logging.StreamHandler(stdout) + console_handler.setFormatter(formatter) + + self.addHandler(console_handler) + self.setLevel(logging.INFO) + + def handle(self, record): + super().handle(record) \ No newline at end of file diff --git a/obsrv/utils/time.py b/obsrv/utils/time.py new file mode 100644 index 0000000..3c7b287 --- /dev/null +++ b/obsrv/utils/time.py @@ -0,0 +1,15 @@ +import time +from typing import Iterator + +def time_it(func): + def wrapper(*args, **kwargs): + start_time = time.time() + result = func(*args, **kwargs) + end_time = time.time() + elapsed_time = end_time - start_time + + if isinstance(result, Iterator): + yield elapsed_time, result + else: + return elapsed_time, result + return wrapper \ No newline at end of file diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..b507ff8 --- /dev/null +++ b/poetry.lock @@ -0,0 +1,708 @@ +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. + +[[package]] +name = "certifi" +version = "2024.2.2" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"}, + {file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.3.2" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, +] + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "coverage" +version = "7.4.3" +description = "Code coverage measurement for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "coverage-7.4.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8580b827d4746d47294c0e0b92854c85a92c2227927433998f0d3320ae8a71b6"}, + {file = "coverage-7.4.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:718187eeb9849fc6cc23e0d9b092bc2348821c5e1a901c9f8975df0bc785bfd4"}, + {file = "coverage-7.4.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:767b35c3a246bcb55b8044fd3a43b8cd553dd1f9f2c1eeb87a302b1f8daa0524"}, + {file = "coverage-7.4.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae7f19afe0cce50039e2c782bff379c7e347cba335429678450b8fe81c4ef96d"}, + {file = "coverage-7.4.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba3a8aaed13770e970b3df46980cb068d1c24af1a1968b7818b69af8c4347efb"}, + {file = "coverage-7.4.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:ee866acc0861caebb4f2ab79f0b94dbfbdbfadc19f82e6e9c93930f74e11d7a0"}, + {file = "coverage-7.4.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:506edb1dd49e13a2d4cac6a5173317b82a23c9d6e8df63efb4f0380de0fbccbc"}, + {file = "coverage-7.4.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd6545d97c98a192c5ac995d21c894b581f1fd14cf389be90724d21808b657e2"}, + {file = "coverage-7.4.3-cp310-cp310-win32.whl", hash = "sha256:f6a09b360d67e589236a44f0c39218a8efba2593b6abdccc300a8862cffc2f94"}, + {file = "coverage-7.4.3-cp310-cp310-win_amd64.whl", hash = "sha256:18d90523ce7553dd0b7e23cbb28865db23cddfd683a38fb224115f7826de78d0"}, + {file = "coverage-7.4.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cbbe5e739d45a52f3200a771c6d2c7acf89eb2524890a4a3aa1a7fa0695d2a47"}, + {file = "coverage-7.4.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:489763b2d037b164846ebac0cbd368b8a4ca56385c4090807ff9fad817de4113"}, + {file = "coverage-7.4.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:451f433ad901b3bb00184d83fd83d135fb682d780b38af7944c9faeecb1e0bfe"}, + {file = "coverage-7.4.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fcc66e222cf4c719fe7722a403888b1f5e1682d1679bd780e2b26c18bb648cdc"}, + {file = "coverage-7.4.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3ec74cfef2d985e145baae90d9b1b32f85e1741b04cd967aaf9cfa84c1334f3"}, + {file = "coverage-7.4.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:abbbd8093c5229c72d4c2926afaee0e6e3140de69d5dcd918b2921f2f0c8baba"}, + {file = "coverage-7.4.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:35eb581efdacf7b7422af677b92170da4ef34500467381e805944a3201df2079"}, + {file = "coverage-7.4.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8249b1c7334be8f8c3abcaaa996e1e4927b0e5a23b65f5bf6cfe3180d8ca7840"}, + {file = "coverage-7.4.3-cp311-cp311-win32.whl", hash = "sha256:cf30900aa1ba595312ae41978b95e256e419d8a823af79ce670835409fc02ad3"}, + {file = "coverage-7.4.3-cp311-cp311-win_amd64.whl", hash = "sha256:18c7320695c949de11a351742ee001849912fd57e62a706d83dfc1581897fa2e"}, + {file = "coverage-7.4.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b51bfc348925e92a9bd9b2e48dad13431b57011fd1038f08316e6bf1df107d10"}, + {file = "coverage-7.4.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d6cdecaedea1ea9e033d8adf6a0ab11107b49571bbb9737175444cea6eb72328"}, + {file = "coverage-7.4.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b2eccb883368f9e972e216c7b4c7c06cabda925b5f06dde0650281cb7666a30"}, + {file = "coverage-7.4.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6c00cdc8fa4e50e1cc1f941a7f2e3e0f26cb2a1233c9696f26963ff58445bac7"}, + {file = "coverage-7.4.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9a4a8dd3dcf4cbd3165737358e4d7dfbd9d59902ad11e3b15eebb6393b0446e"}, + {file = "coverage-7.4.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:062b0a75d9261e2f9c6d071753f7eef0fc9caf3a2c82d36d76667ba7b6470003"}, + {file = "coverage-7.4.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:ebe7c9e67a2d15fa97b77ea6571ce5e1e1f6b0db71d1d5e96f8d2bf134303c1d"}, + {file = "coverage-7.4.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c0a120238dd71c68484f02562f6d446d736adcc6ca0993712289b102705a9a3a"}, + {file = "coverage-7.4.3-cp312-cp312-win32.whl", hash = "sha256:37389611ba54fd6d278fde86eb2c013c8e50232e38f5c68235d09d0a3f8aa352"}, + {file = "coverage-7.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:d25b937a5d9ffa857d41be042b4238dd61db888533b53bc76dc082cb5a15e914"}, + {file = "coverage-7.4.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:28ca2098939eabab044ad68850aac8f8db6bf0b29bc7f2887d05889b17346454"}, + {file = "coverage-7.4.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:280459f0a03cecbe8800786cdc23067a8fc64c0bd51dc614008d9c36e1659d7e"}, + {file = "coverage-7.4.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c0cdedd3500e0511eac1517bf560149764b7d8e65cb800d8bf1c63ebf39edd2"}, + {file = "coverage-7.4.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9a9babb9466fe1da12417a4aed923e90124a534736de6201794a3aea9d98484e"}, + {file = "coverage-7.4.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dec9de46a33cf2dd87a5254af095a409ea3bf952d85ad339751e7de6d962cde6"}, + {file = "coverage-7.4.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:16bae383a9cc5abab9bb05c10a3e5a52e0a788325dc9ba8499e821885928968c"}, + {file = "coverage-7.4.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:2c854ce44e1ee31bda4e318af1dbcfc929026d12c5ed030095ad98197eeeaed0"}, + {file = "coverage-7.4.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ce8c50520f57ec57aa21a63ea4f325c7b657386b3f02ccaedeccf9ebe27686e1"}, + {file = "coverage-7.4.3-cp38-cp38-win32.whl", hash = "sha256:708a3369dcf055c00ddeeaa2b20f0dd1ce664eeabde6623e516c5228b753654f"}, + {file = "coverage-7.4.3-cp38-cp38-win_amd64.whl", hash = "sha256:1bf25fbca0c8d121a3e92a2a0555c7e5bc981aee5c3fdaf4bb7809f410f696b9"}, + {file = "coverage-7.4.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3b253094dbe1b431d3a4ac2f053b6d7ede2664ac559705a704f621742e034f1f"}, + {file = "coverage-7.4.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:77fbfc5720cceac9c200054b9fab50cb2a7d79660609200ab83f5db96162d20c"}, + {file = "coverage-7.4.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6679060424faa9c11808598504c3ab472de4531c571ab2befa32f4971835788e"}, + {file = "coverage-7.4.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4af154d617c875b52651dd8dd17a31270c495082f3d55f6128e7629658d63765"}, + {file = "coverage-7.4.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8640f1fde5e1b8e3439fe482cdc2b0bb6c329f4bb161927c28d2e8879c6029ee"}, + {file = "coverage-7.4.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:69b9f6f66c0af29642e73a520b6fed25ff9fd69a25975ebe6acb297234eda501"}, + {file = "coverage-7.4.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:0842571634f39016a6c03e9d4aba502be652a6e4455fadb73cd3a3a49173e38f"}, + {file = "coverage-7.4.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a78ed23b08e8ab524551f52953a8a05d61c3a760781762aac49f8de6eede8c45"}, + {file = "coverage-7.4.3-cp39-cp39-win32.whl", hash = "sha256:c0524de3ff096e15fcbfe8f056fdb4ea0bf497d584454f344d59fce069d3e6e9"}, + {file = "coverage-7.4.3-cp39-cp39-win_amd64.whl", hash = "sha256:0209a6369ccce576b43bb227dc8322d8ef9e323d089c6f3f26a597b09cb4d2aa"}, + {file = "coverage-7.4.3-pp38.pp39.pp310-none-any.whl", hash = "sha256:7cbde573904625509a3f37b6fecea974e363460b556a627c60dc2f47e2fffa51"}, + {file = "coverage-7.4.3.tar.gz", hash = "sha256:276f6077a5c61447a48d133ed13e759c09e62aff0dc84274a68dc18660104d52"}, +] + +[package.extras] +toml = ["tomli"] + +[[package]] +name = "docker" +version = "7.0.0" +description = "A Python library for the Docker Engine API." +optional = false +python-versions = ">=3.8" +files = [ + {file = "docker-7.0.0-py3-none-any.whl", hash = "sha256:12ba681f2777a0ad28ffbcc846a69c31b4dfd9752b47eb425a274ee269c5e14b"}, + {file = "docker-7.0.0.tar.gz", hash = "sha256:323736fb92cd9418fc5e7133bc953e11a9da04f4483f828b527db553f1e7e5a3"}, +] + +[package.dependencies] +packaging = ">=14.0" +pywin32 = {version = ">=304", markers = "sys_platform == \"win32\""} +requests = ">=2.26.0" +urllib3 = ">=1.26.0" + +[package.extras] +ssh = ["paramiko (>=2.4.3)"] +websockets = ["websocket-client (>=1.3.0)"] + +[[package]] +name = "idna" +version = "3.7" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, + {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, +] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "kafka-python" +version = "2.0.3.dev0" +description = "Pure Python client for Apache Kafka" +optional = false +python-versions = "*" +files = [] +develop = false + +[package.extras] +crc32c = ["crc32c"] +lz4 = ["lz4"] +snappy = ["python-snappy"] +zstd = ["zstandard"] + +[package.source] +type = "git" +url = "https://github.com/dpkp/kafka-python.git" +reference = "HEAD" +resolved_reference = "a6d0579d3cadd3826dd364b01bc12a2173139abc" + +[[package]] +name = "packaging" +version = "24.0" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"}, + {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"}, +] + +[[package]] +name = "pluggy" +version = "1.4.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.4.0-py3-none-any.whl", hash = "sha256:7db9f7b503d67d1c5b95f59773ebb58a8c1c288129a88665838012cfb07b8981"}, + {file = "pluggy-1.4.0.tar.gz", hash = "sha256:8c85c2876142a764e5b7548e7d9a0e0ddb46f5185161049a79b7e974454223be"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "psycopg2-binary" +version = "2.9.9" +description = "psycopg2 - Python-PostgreSQL Database Adapter" +optional = false +python-versions = ">=3.7" +files = [ + {file = "psycopg2-binary-2.9.9.tar.gz", hash = "sha256:7f01846810177d829c7692f1f5ada8096762d9172af1b1a28d4ab5b77c923c1c"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c2470da5418b76232f02a2fcd2229537bb2d5a7096674ce61859c3229f2eb202"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c6af2a6d4b7ee9615cbb162b0738f6e1fd1f5c3eda7e5da17861eacf4c717ea7"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:75723c3c0fbbf34350b46a3199eb50638ab22a0228f93fb472ef4d9becc2382b"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:83791a65b51ad6ee6cf0845634859d69a038ea9b03d7b26e703f94c7e93dbcf9"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0ef4854e82c09e84cc63084a9e4ccd6d9b154f1dbdd283efb92ecd0b5e2b8c84"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed1184ab8f113e8d660ce49a56390ca181f2981066acc27cf637d5c1e10ce46e"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d2997c458c690ec2bc6b0b7ecbafd02b029b7b4283078d3b32a852a7ce3ddd98"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:b58b4710c7f4161b5e9dcbe73bb7c62d65670a87df7bcce9e1faaad43e715245"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:0c009475ee389757e6e34611d75f6e4f05f0cf5ebb76c6037508318e1a1e0d7e"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8dbf6d1bc73f1d04ec1734bae3b4fb0ee3cb2a493d35ede9badbeb901fb40f6f"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-win32.whl", hash = "sha256:3f78fd71c4f43a13d342be74ebbc0666fe1f555b8837eb113cb7416856c79682"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:876801744b0dee379e4e3c38b76fc89f88834bb15bf92ee07d94acd06ec890a0"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ee825e70b1a209475622f7f7b776785bd68f34af6e7a46e2e42f27b659b5bc26"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1ea665f8ce695bcc37a90ee52de7a7980be5161375d42a0b6c6abedbf0d81f0f"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:143072318f793f53819048fdfe30c321890af0c3ec7cb1dfc9cc87aa88241de2"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c332c8d69fb64979ebf76613c66b985414927a40f8defa16cf1bc028b7b0a7b0"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7fc5a5acafb7d6ccca13bfa8c90f8c51f13d8fb87d95656d3950f0158d3ce53"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:977646e05232579d2e7b9c59e21dbe5261f403a88417f6a6512e70d3f8a046be"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b6356793b84728d9d50ead16ab43c187673831e9d4019013f1402c41b1db9b27"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bc7bb56d04601d443f24094e9e31ae6deec9ccb23581f75343feebaf30423359"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:77853062a2c45be16fd6b8d6de2a99278ee1d985a7bd8b103e97e41c034006d2"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:78151aa3ec21dccd5cdef6c74c3e73386dcdfaf19bced944169697d7ac7482fc"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-win32.whl", hash = "sha256:dc4926288b2a3e9fd7b50dc6a1909a13bbdadfc67d93f3374d984e56f885579d"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:b76bedd166805480ab069612119ea636f5ab8f8771e640ae103e05a4aae3e417"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0605eaed3eb239e87df0d5e3c6489daae3f7388d455d0c0b4df899519c6a38d"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e6f98446430fdf41bd36d4faa6cb409f5140c1c2cf58ce0bbdaf16af7d3f119"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c77e3d1862452565875eb31bdb45ac62502feabbd53429fdc39a1cc341d681ba"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win32.whl", hash = "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2293b001e319ab0d869d660a704942c9e2cce19745262a8aba2115ef41a0a42a"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ef7df18daf2c4c07e2695e8cfd5ee7f748a1d54d802330985a78d2a5a6dca9"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a602ea5aff39bb9fac6308e9c9d82b9a35c2bf288e184a816002c9fae930b77"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8359bf4791968c5a78c56103702000105501adb557f3cf772b2c207284273984"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:275ff571376626195ab95a746e6a04c7df8ea34638b99fc11160de91f2fef503"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:f9b5571d33660d5009a8b3c25dc1db560206e2d2f89d3df1cb32d72c0d117d52"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:420f9bbf47a02616e8554e825208cb947969451978dceb77f95ad09c37791dae"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:4154ad09dac630a0f13f37b583eae260c6aa885d67dfbccb5b02c33f31a6d420"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a148c5d507bb9b4f2030a2025c545fccb0e1ef317393eaba42e7eabd28eb6041"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:68fc1f1ba168724771e38bee37d940d2865cb0f562380a1fb1ffb428b75cb692"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:281309265596e388ef483250db3640e5f414168c5a67e9c665cafce9492eda2f"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:60989127da422b74a04345096c10d416c2b41bd7bf2a380eb541059e4e999980"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:246b123cc54bb5361588acc54218c8c9fb73068bf227a4a531d8ed56fa3ca7d6"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:34eccd14566f8fe14b2b95bb13b11572f7c7d5c36da61caf414d23b91fcc5d94"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18d0ef97766055fec15b5de2c06dd8e7654705ce3e5e5eed3b6651a1d2a9a152"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d3f82c171b4ccd83bbaf35aa05e44e690113bd4f3b7b6cc54d2219b132f3ae55"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ead20f7913a9c1e894aebe47cccf9dc834e1618b7aa96155d2091a626e59c972"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ca49a8119c6cbd77375ae303b0cfd8c11f011abbbd64601167ecca18a87e7cdd"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:323ba25b92454adb36fa425dc5cf6f8f19f78948cbad2e7bc6cdf7b0d7982e59"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:1236ed0952fbd919c100bc839eaa4a39ebc397ed1c08a97fc45fee2a595aa1b3"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:729177eaf0aefca0994ce4cffe96ad3c75e377c7b6f4efa59ebf003b6d398716"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-win32.whl", hash = "sha256:804d99b24ad523a1fe18cc707bf741670332f7c7412e9d49cb5eab67e886b9b5"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-win_amd64.whl", hash = "sha256:a6cdcc3ede532f4a4b96000b6362099591ab4a3e913d70bcbac2b56c872446f7"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:72dffbd8b4194858d0941062a9766f8297e8868e1dd07a7b36212aaa90f49472"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:30dcc86377618a4c8f3b72418df92e77be4254d8f89f14b8e8f57d6d43603c0f"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31a34c508c003a4347d389a9e6fcc2307cc2150eb516462a7a17512130de109e"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:15208be1c50b99203fe88d15695f22a5bed95ab3f84354c494bcb1d08557df67"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1873aade94b74715be2246321c8650cabf5a0d098a95bab81145ffffa4c13876"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a58c98a7e9c021f357348867f537017057c2ed7f77337fd914d0bedb35dace7"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4686818798f9194d03c9129a4d9a702d9e113a89cb03bffe08c6cf799e053291"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ebdc36bea43063116f0486869652cb2ed7032dbc59fbcb4445c4862b5c1ecf7f"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:ca08decd2697fdea0aea364b370b1249d47336aec935f87b8bbfd7da5b2ee9c1"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ac05fb791acf5e1a3e39402641827780fe44d27e72567a000412c648a85ba860"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-win32.whl", hash = "sha256:9dba73be7305b399924709b91682299794887cbbd88e38226ed9f6712eabee90"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-win_amd64.whl", hash = "sha256:f7ae5d65ccfbebdfa761585228eb4d0df3a8b15cfb53bd953e713e09fbb12957"}, +] + +[[package]] +name = "py4j" +version = "0.10.9.7" +description = "Enables Python programs to dynamically access arbitrary Java objects" +optional = false +python-versions = "*" +files = [ + {file = "py4j-0.10.9.7-py2.py3-none-any.whl", hash = "sha256:85defdfd2b2376eb3abf5ca6474b51ab7e0de341c75a02f46dc9b5976f5a5c1b"}, + {file = "py4j-0.10.9.7.tar.gz", hash = "sha256:0b6e5315bb3ada5cf62ac651d107bb2ebc02def3dee9d9548e3baac644ea8dbb"}, +] + +[[package]] +name = "pycryptodome" +version = "3.20.0" +description = "Cryptographic library for Python" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "pycryptodome-3.20.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:f0e6d631bae3f231d3634f91ae4da7a960f7ff87f2865b2d2b831af1dfb04e9a"}, + {file = "pycryptodome-3.20.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:baee115a9ba6c5d2709a1e88ffe62b73ecc044852a925dcb67713a288c4ec70f"}, + {file = "pycryptodome-3.20.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:417a276aaa9cb3be91f9014e9d18d10e840a7a9b9a9be64a42f553c5b50b4d1d"}, + {file = "pycryptodome-3.20.0-cp27-cp27m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a1250b7ea809f752b68e3e6f3fd946b5939a52eaeea18c73bdab53e9ba3c2dd"}, + {file = "pycryptodome-3.20.0-cp27-cp27m-musllinux_1_1_aarch64.whl", hash = "sha256:d5954acfe9e00bc83ed9f5cb082ed22c592fbbef86dc48b907238be64ead5c33"}, + {file = "pycryptodome-3.20.0-cp27-cp27m-win32.whl", hash = "sha256:06d6de87c19f967f03b4cf9b34e538ef46e99a337e9a61a77dbe44b2cbcf0690"}, + {file = "pycryptodome-3.20.0-cp27-cp27m-win_amd64.whl", hash = "sha256:ec0bb1188c1d13426039af8ffcb4dbe3aad1d7680c35a62d8eaf2a529b5d3d4f"}, + {file = "pycryptodome-3.20.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:5601c934c498cd267640b57569e73793cb9a83506f7c73a8ec57a516f5b0b091"}, + {file = "pycryptodome-3.20.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d29daa681517f4bc318cd8a23af87e1f2a7bad2fe361e8aa29c77d652a065de4"}, + {file = "pycryptodome-3.20.0-cp27-cp27mu-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3427d9e5310af6680678f4cce149f54e0bb4af60101c7f2c16fdf878b39ccccc"}, + {file = "pycryptodome-3.20.0-cp27-cp27mu-musllinux_1_1_aarch64.whl", hash = "sha256:3cd3ef3aee1079ae44afaeee13393cf68b1058f70576b11439483e34f93cf818"}, + {file = "pycryptodome-3.20.0-cp35-abi3-macosx_10_9_universal2.whl", hash = "sha256:ac1c7c0624a862f2e53438a15c9259d1655325fc2ec4392e66dc46cdae24d044"}, + {file = "pycryptodome-3.20.0-cp35-abi3-macosx_10_9_x86_64.whl", hash = "sha256:76658f0d942051d12a9bd08ca1b6b34fd762a8ee4240984f7c06ddfb55eaf15a"}, + {file = "pycryptodome-3.20.0-cp35-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f35d6cee81fa145333137009d9c8ba90951d7d77b67c79cbe5f03c7eb74d8fe2"}, + {file = "pycryptodome-3.20.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76cb39afede7055127e35a444c1c041d2e8d2f1f9c121ecef573757ba4cd2c3c"}, + {file = "pycryptodome-3.20.0-cp35-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49a4c4dc60b78ec41d2afa392491d788c2e06edf48580fbfb0dd0f828af49d25"}, + {file = "pycryptodome-3.20.0-cp35-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:fb3b87461fa35afa19c971b0a2b7456a7b1db7b4eba9a8424666104925b78128"}, + {file = "pycryptodome-3.20.0-cp35-abi3-musllinux_1_1_i686.whl", hash = "sha256:acc2614e2e5346a4a4eab6e199203034924313626f9620b7b4b38e9ad74b7e0c"}, + {file = "pycryptodome-3.20.0-cp35-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:210ba1b647837bfc42dd5a813cdecb5b86193ae11a3f5d972b9a0ae2c7e9e4b4"}, + {file = "pycryptodome-3.20.0-cp35-abi3-win32.whl", hash = "sha256:8d6b98d0d83d21fb757a182d52940d028564efe8147baa9ce0f38d057104ae72"}, + {file = "pycryptodome-3.20.0-cp35-abi3-win_amd64.whl", hash = "sha256:9b3ae153c89a480a0ec402e23db8d8d84a3833b65fa4b15b81b83be9d637aab9"}, + {file = "pycryptodome-3.20.0-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:4401564ebf37dfde45d096974c7a159b52eeabd9969135f0426907db367a652a"}, + {file = "pycryptodome-3.20.0-pp27-pypy_73-win32.whl", hash = "sha256:ec1f93feb3bb93380ab0ebf8b859e8e5678c0f010d2d78367cf6bc30bfeb148e"}, + {file = "pycryptodome-3.20.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:acae12b9ede49f38eb0ef76fdec2df2e94aad85ae46ec85be3648a57f0a7db04"}, + {file = "pycryptodome-3.20.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f47888542a0633baff535a04726948e876bf1ed880fddb7c10a736fa99146ab3"}, + {file = "pycryptodome-3.20.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e0e4a987d38cfc2e71b4a1b591bae4891eeabe5fa0f56154f576e26287bfdea"}, + {file = "pycryptodome-3.20.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c18b381553638414b38705f07d1ef0a7cf301bc78a5f9bc17a957eb19446834b"}, + {file = "pycryptodome-3.20.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a60fedd2b37b4cb11ccb5d0399efe26db9e0dd149016c1cc6c8161974ceac2d6"}, + {file = "pycryptodome-3.20.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:405002eafad114a2f9a930f5db65feef7b53c4784495dd8758069b89baf68eab"}, + {file = "pycryptodome-3.20.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2ab6ab0cb755154ad14e507d1df72de9897e99fd2d4922851a276ccc14f4f1a5"}, + {file = "pycryptodome-3.20.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:acf6e43fa75aca2d33e93409f2dafe386fe051818ee79ee8a3e21de9caa2ac9e"}, + {file = "pycryptodome-3.20.0.tar.gz", hash = "sha256:09609209ed7de61c2b560cc5c8c4fbf892f8b15b1faf7e4cbffac97db1fffda7"}, +] + +[[package]] +name = "pyspark" +version = "3.5.1" +description = "Apache Spark Python API" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pyspark-3.5.1.tar.gz", hash = "sha256:dd6569e547365eadc4f887bf57f153e4d582a68c4b490de475d55b9981664910"}, +] + +[package.dependencies] +py4j = "0.10.9.7" + +[package.extras] +connect = ["googleapis-common-protos (>=1.56.4)", "grpcio (>=1.56.0)", "grpcio-status (>=1.56.0)", "numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"] +ml = ["numpy (>=1.15)"] +mllib = ["numpy (>=1.15)"] +pandas-on-spark = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"] +sql = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"] + +[[package]] +name = "pytest" +version = "8.1.1" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-8.1.1-py3-none-any.whl", hash = "sha256:2a8386cfc11fa9d2c50ee7b2a57e7d898ef90470a7a34c4b949ff59662bb78b7"}, + {file = "pytest-8.1.1.tar.gz", hash = "sha256:ac978141a75948948817d360297b7aae0fcb9d6ff6bc9ec6d514b85d5a65c044"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=1.4,<2.0" + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pywin32" +version = "306" +description = "Python for Window Extensions" +optional = false +python-versions = "*" +files = [ + {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, + {file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"}, + {file = "pywin32-306-cp311-cp311-win32.whl", hash = "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407"}, + {file = "pywin32-306-cp311-cp311-win_amd64.whl", hash = "sha256:a7639f51c184c0272e93f244eb24dafca9b1855707d94c192d4a0b4c01e1100e"}, + {file = "pywin32-306-cp311-cp311-win_arm64.whl", hash = "sha256:70dba0c913d19f942a2db25217d9a1b726c278f483a919f1abfed79c9cf64d3a"}, + {file = "pywin32-306-cp312-cp312-win32.whl", hash = "sha256:383229d515657f4e3ed1343da8be101000562bf514591ff383ae940cad65458b"}, + {file = "pywin32-306-cp312-cp312-win_amd64.whl", hash = "sha256:37257794c1ad39ee9be652da0462dc2e394c8159dfd913a8a4e8eb6fd346da0e"}, + {file = "pywin32-306-cp312-cp312-win_arm64.whl", hash = "sha256:5821ec52f6d321aa59e2db7e0a35b997de60c201943557d108af9d4ae1ec7040"}, + {file = "pywin32-306-cp37-cp37m-win32.whl", hash = "sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65"}, + {file = "pywin32-306-cp37-cp37m-win_amd64.whl", hash = "sha256:72c5f621542d7bdd4fdb716227be0dd3f8565c11b280be6315b06ace35487d36"}, + {file = "pywin32-306-cp38-cp38-win32.whl", hash = "sha256:e4c092e2589b5cf0d365849e73e02c391c1349958c5ac3e9d5ccb9a28e017b3a"}, + {file = "pywin32-306-cp38-cp38-win_amd64.whl", hash = "sha256:e8ac1ae3601bee6ca9f7cb4b5363bf1c0badb935ef243c4733ff9a393b1690c0"}, + {file = "pywin32-306-cp39-cp39-win32.whl", hash = "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802"}, + {file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"}, +] + +[[package]] +name = "pyyaml" +version = "6.0.1" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, +] + +[[package]] +name = "requests" +version = "2.31.0" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.7" +files = [ + {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, + {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "testcontainers" +version = "4.4.0" +description = "Python library for throwaway instances of anything that can run in a Docker container" +optional = false +python-versions = "<4.0,>=3.9" +files = [ + {file = "testcontainers-4.4.0-py3-none-any.whl", hash = "sha256:455e0e28bbf9dcf2d66a342e3103b0d8889db3295a490c009a98848e08791837"}, + {file = "testcontainers-4.4.0.tar.gz", hash = "sha256:f4e87c3831991f9b4d5b6544d0f438fbfd140a74c834b13efb15a2f7c89833c4"}, +] + +[package.dependencies] +docker = "*" +typing-extensions = "*" +urllib3 = "*" +wrapt = "*" + +[package.extras] +arangodb = ["python-arango (>=7.8,<8.0)"] +azurite = ["azure-storage-blob (>=12.19,<13.0)"] +chroma = ["chromadb-client"] +clickhouse = ["clickhouse-driver"] +google = ["google-cloud-datastore (>=2)", "google-cloud-pubsub (>=2)"] +influxdb = ["influxdb", "influxdb-client"] +k3s = ["kubernetes", "pyyaml"] +keycloak = ["python-keycloak"] +localstack = ["boto3"] +minio = ["minio"] +mongodb = ["pymongo"] +mssql = ["pymssql", "sqlalchemy"] +mysql = ["pymysql[rsa]", "sqlalchemy"] +nats = ["nats-py"] +neo4j = ["neo4j"] +opensearch = ["opensearch-py"] +oracle = ["oracledb", "sqlalchemy"] +oracle-free = ["oracledb", "sqlalchemy"] +qdrant = ["qdrant-client"] +rabbitmq = ["pika"] +redis = ["redis"] +registry = ["bcrypt"] +selenium = ["selenium"] +weaviate = ["weaviate-client (>=4.5.4,<5.0.0)"] + +[[package]] +name = "typing-extensions" +version = "4.11.0" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +files = [ + {file = "typing_extensions-4.11.0-py3-none-any.whl", hash = "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a"}, + {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"}, +] + +[[package]] +name = "urllib3" +version = "2.2.1" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.8" +files = [ + {file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"}, + {file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +h2 = ["h2 (>=4,<5)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + +[[package]] +name = "wrapt" +version = "1.16.0" +description = "Module for decorators, wrappers and monkey patching." +optional = false +python-versions = ">=3.6" +files = [ + {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"}, + {file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136"}, + {file = "wrapt-1.16.0-cp310-cp310-win32.whl", hash = "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d"}, + {file = "wrapt-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2"}, + {file = "wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09"}, + {file = "wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d"}, + {file = "wrapt-1.16.0-cp311-cp311-win32.whl", hash = "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362"}, + {file = "wrapt-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89"}, + {file = "wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b"}, + {file = "wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c"}, + {file = "wrapt-1.16.0-cp312-cp312-win32.whl", hash = "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc"}, + {file = "wrapt-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8"}, + {file = "wrapt-1.16.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d462f28826f4657968ae51d2181a074dfe03c200d6131690b7d65d55b0f360f8"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a33a747400b94b6d6b8a165e4480264a64a78c8a4c734b62136062e9a248dd39"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3646eefa23daeba62643a58aac816945cadc0afaf21800a1421eeba5f6cfb9c"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ebf019be5c09d400cf7b024aa52b1f3aeebeff51550d007e92c3c1c4afc2a40"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:0d2691979e93d06a95a26257adb7bfd0c93818e89b1406f5a28f36e0d8c1e1fc"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:1acd723ee2a8826f3d53910255643e33673e1d11db84ce5880675954183ec47e"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:bc57efac2da352a51cc4658878a68d2b1b67dbe9d33c36cb826ca449d80a8465"}, + {file = "wrapt-1.16.0-cp36-cp36m-win32.whl", hash = "sha256:da4813f751142436b075ed7aa012a8778aa43a99f7b36afe9b742d3ed8bdc95e"}, + {file = "wrapt-1.16.0-cp36-cp36m-win_amd64.whl", hash = "sha256:6f6eac2360f2d543cc875a0e5efd413b6cbd483cb3ad7ebf888884a6e0d2e966"}, + {file = "wrapt-1.16.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a0ea261ce52b5952bf669684a251a66df239ec6d441ccb59ec7afa882265d593"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bd2d7ff69a2cac767fbf7a2b206add2e9a210e57947dd7ce03e25d03d2de292"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9159485323798c8dc530a224bd3ffcf76659319ccc7bbd52e01e73bd0241a0c5"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a86373cf37cd7764f2201b76496aba58a52e76dedfaa698ef9e9688bfd9e41cf"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:73870c364c11f03ed072dda68ff7aea6d2a3a5c3fe250d917a429c7432e15228"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b935ae30c6e7400022b50f8d359c03ed233d45b725cfdd299462f41ee5ffba6f"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:db98ad84a55eb09b3c32a96c576476777e87c520a34e2519d3e59c44710c002c"}, + {file = "wrapt-1.16.0-cp37-cp37m-win32.whl", hash = "sha256:9153ed35fc5e4fa3b2fe97bddaa7cbec0ed22412b85bcdaf54aeba92ea37428c"}, + {file = "wrapt-1.16.0-cp37-cp37m-win_amd64.whl", hash = "sha256:66dfbaa7cfa3eb707bbfcd46dab2bc6207b005cbc9caa2199bcbc81d95071a00"}, + {file = "wrapt-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1dd50a2696ff89f57bd8847647a1c363b687d3d796dc30d4dd4a9d1689a706f0"}, + {file = "wrapt-1.16.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:44a2754372e32ab315734c6c73b24351d06e77ffff6ae27d2ecf14cf3d229202"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e9723528b9f787dc59168369e42ae1c3b0d3fadb2f1a71de14531d321ee05b0"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbed418ba5c3dce92619656802cc5355cb679e58d0d89b50f116e4a9d5a9603e"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:941988b89b4fd6b41c3f0bfb20e92bd23746579736b7343283297c4c8cbae68f"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6a42cd0cfa8ffc1915aef79cb4284f6383d8a3e9dcca70c445dcfdd639d51267"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ca9b6085e4f866bd584fb135a041bfc32cab916e69f714a7d1d397f8c4891ca"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5e49454f19ef621089e204f862388d29e6e8d8b162efce05208913dde5b9ad6"}, + {file = "wrapt-1.16.0-cp38-cp38-win32.whl", hash = "sha256:c31f72b1b6624c9d863fc095da460802f43a7c6868c5dda140f51da24fd47d7b"}, + {file = "wrapt-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:490b0ee15c1a55be9c1bd8609b8cecd60e325f0575fc98f50058eae366e01f41"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9b201ae332c3637a42f02d1045e1d0cccfdc41f1f2f801dafbaa7e9b4797bfc2"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2076fad65c6736184e77d7d4729b63a6d1ae0b70da4868adeec40989858eb3fb"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5cd603b575ebceca7da5a3a251e69561bec509e0b46e4993e1cac402b7247b8"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b47cfad9e9bbbed2339081f4e346c93ecd7ab504299403320bf85f7f85c7d46c"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8212564d49c50eb4565e502814f694e240c55551a5f1bc841d4fcaabb0a9b8a"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5f15814a33e42b04e3de432e573aa557f9f0f56458745c2074952f564c50e664"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db2e408d983b0e61e238cf579c09ef7020560441906ca990fe8412153e3b291f"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:edfad1d29c73f9b863ebe7082ae9321374ccb10879eeabc84ba3b69f2579d537"}, + {file = "wrapt-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed867c42c268f876097248e05b6117a65bcd1e63b779e916fe2e33cd6fd0d3c3"}, + {file = "wrapt-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:eb1b046be06b0fce7249f1d025cd359b4b80fc1c3e24ad9eca33e0dcdb2e4a35"}, + {file = "wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1"}, + {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"}, +] + +[metadata] +lock-version = "2.0" +python-versions = "^3.12" +content-hash = "02f4246bf3bbb2546b674ba8698c3c49aff901734180e1ee0b4e038e089276d4" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..54b06f2 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,27 @@ +[tool.poetry] +name = "obsrv" +version = "0.1.0" +description = "" +authors = ["Ravi Mula "] +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.12" +pycryptodome = "^3.20.0" +psycopg2-binary = "^2.9.9" +pyyaml = "^6.0.1" +kafka-python = {git = "https://github.com/dpkp/kafka-python.git"} + +[tool.poetry.group.dev.dependencies] +pytest = "^8.1.1" +coverage = "^7.4.3" +pyspark = "^3.5.1" +testcontainers = {extras = ["kafka", "postgres"], version = "^4.4.0"} +kafka-python = "^2.0.2" + +[tool.poetry.group.batch.dependencies] +pyspark = "^3.5.1" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/create_tables.py b/tests/create_tables.py new file mode 100644 index 0000000..4c7e562 --- /dev/null +++ b/tests/create_tables.py @@ -0,0 +1,121 @@ +import yaml +import os +import psycopg2 +from obsrv.utils import EncryptionUtil + +def create_tables(config): + enc = EncryptionUtil(config['obsrv-encryption-key']) + + datasets = """ + CREATE TABLE IF NOT EXISTS datasets ( + id TEXT PRIMARY KEY, + dataset_id TEXT, + type TEXT NOT NULL, + name TEXT, + validation_config JSON, + extraction_config JSON, + dedup_config JSON, + data_schema JSON, + denorm_config JSON, + router_config JSON, + dataset_config JSON, + status TEXT, + tags TEXT[], + data_version INT, + created_by TEXT, + updated_by TEXT, + created_date TIMESTAMP NOT NULL DEFAULT now(), + updated_date TIMESTAMP NOT NULL DEFAULT now(), + published_date TIMESTAMP NOT NULL DEFAULT now() + );""" + + connector_registry = """ + CREATE TABLE IF NOT EXISTS connector_registry ( + id TEXT PRIMARY KEY, + version TEXT NOT NULL, + type TEXT NOT NULL, + category TEXT NOT NULL, + name TEXT NOT NULL, + description TEXT, + technology TEXT NOT NULL, + licence TEXT NOT NULL, + owner TEXT NOT NULL, + iconURL TEXT, + status TEXT NOT NULL, + created_by text NOT NULL, + updated_by text NOT NULL, + created_date TIMESTAMP NOT NULL DEFAULT now(), + updated_date TIMESTAMP NOT NULL, + live_date TIMESTAMP NOT NULL DEFAULT now() + );""" + + connector_instances = """ + CREATE TABLE IF NOT EXISTS connector_instances ( + id TEXT PRIMARY KEY, + dataset_id TEXT NOT NULL REFERENCES datasets (id), + connector_id TEXT NOT NULL REFERENCES connector_registry (id), + connector_type TEXT NOT NULL, + connector_config json NOT NULL, + operations_config json NOT NULL, + status TEXT NOT NULL, + connector_state JSON, + connector_stats JSON, + created_by text NOT NULL, + updated_by text NOT NULL, + created_date TIMESTAMP NOT NULL DEFAULT now(), + updated_date TIMESTAMP NOT NULL, + published_date TIMESTAMP NOT NULL DEFAULT now() + );""" + + indexes = """ + CREATE INDEX IF NOT EXISTS connector_registry_category ON connector_registry(category); + CREATE INDEX IF NOT EXISTS connector_registry_type ON connector_registry(type); + CREATE INDEX IF NOT EXISTS connector_instances_connector_id ON connector_instances(connector_id); + """ + + ins_ds = """ + INSERT INTO datasets (id, dataset_id, type, name, validation_config, extraction_config, dedup_config, data_schema, denorm_config, router_config, dataset_config, tags, data_version, status, created_by, updated_by, created_date, updated_date, published_date) VALUES + ('new-york-taxi-data', 'new-york-taxi-data', 'dataset', 'new-york-taxi-data', '{"validate": true, "mode": "Strict", "validation_mode": "Strict"}', '{"is_batch_event": false}', '{"drop_duplicates": true, "dedup_key": "tripID", "dedup_period": 604800}', '{"$schema":"https://json-schema.org/draft/2020-12/schema","type":"object","properties":{"tripID":{"type":"string","suggestions":[{"message":"The Property tripID appears to be uuid format type.","advice":"Suggest to not to index the high cardinal columns","resolutionType":"DEDUP","severity":"LOW","path":"properties.tripID"}],"arrival_format":"text","data_type":"string"}},"additionalProperties":false}', '{}', '{"topic": "new-york-taxi-data"}', '{"data_key": "", "timestamp_key": "tpep_pickup_datetime", "exclude_fields": [], "entry_topic": "test.ingest", "redis_db_host": "obsrv-dedup-redis-master.redis.svc.cluster.local", "redis_db_port": 6379, "index_data": true, "redis_db": 0}', '{}', '1', 'Live', 'SYSTEM', 'SYSTEM', '2024-03-27 06:48:35.993478', '2024-03-27 06:48:35.993478', '2024-03-27 06:48:35.993478'); + """ + + # ins_ds = """ + # INSERT INTO datasets (id, dataset_id, type, name, dataset_config, status, created_by, updated_by) VALUES + # ('new-york-taxi-data', 'new-york-taxi-data', 'dataset', 'new-york-taxi-data', '{"entry_topic": "test.ingest"}', 'Live', 'SYSTEM', 'SYSTEM'); + # """ + + ins_cr = """ + INSERT INTO connector_registry (id, version, type, category, name, description, technology, licence, owner, iconURL, status, created_by, updated_by, updated_date) VALUES + ('test.1', '1', 'source', 'object', 'test_reader', 'test_reader', 'Python', 'Apache 2.0', 'ravi@obsrv.ai', 'http://localhost', 'Live', 'SYSTEM', 'SYSTEM', now()); + """ + + connector_config = {"type":"local"} + enc_config = enc.encrypt(connector_config) + + enc + ins_ci = """ + INSERT INTO connector_instances (id, dataset_id, connector_id, connector_type, connector_config, operations_config, status, connector_state, connector_stats, created_by, updated_by, created_date, updated_date, published_date) VALUES + ('test.new-york-taxi-data.1', 'new-york-taxi-data', 'test.1', 'source', '{"type":"local"}', '{}', 'Live', '{}', '{}', 'SYSTEM', 'SYSTEM', now(), now(), now() + ); + """ + + + with open(os.path.join(os.path.dirname(__file__), 'config.yaml'), 'r') as config_file: + config = yaml.safe_load(config_file) + conn = psycopg2.connect( + host=config['postgres']['host'], + port=config['postgres']['port'], + user=config['postgres']['user'], + password=config['postgres']['password'], + dbname=config['postgres']['dbname'] + ) + + cur = conn.cursor() + cur.execute(datasets) + cur.execute(connector_registry) + cur.execute(connector_instances) + cur.execute(indexes) + cur.execute(ins_ds) + cur.execute(ins_cr) + cur.execute(ins_ci) + conn.commit() + conn.close() \ No newline at end of file diff --git a/tests/sample_data/nyt_data_100.json b/tests/sample_data/nyt_data_100.json new file mode 100644 index 0000000..2c88bf3 --- /dev/null +++ b/tests/sample_data/nyt_data_100.json @@ -0,0 +1,100 @@ +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"7","PULocationID":"261","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"32.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"10.14","tolls_amount":"0","total_amount":"43.94"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Teagan.Frami@hotmail.com","mobile":"343-570-1197 x16129"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-11-22 03:16:20","tpep_pickup_datetime":"2023-09-18 02:50:01","tripID":"25be90b3-7cbb-49d4-9785-b6fe995b0025","trip_distance":"11.13"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794206,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":654}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"263","PULocationID":"229","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"9.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"3","tolls_amount":"0","total_amount":"13.8"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Lorena.Daugherty@gmail.com","mobile":"559.624.3003 x61721"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-12-21 02:33:16","tpep_pickup_datetime":"2023-12-11 02:23:34","tripID":"86dba649-4810-47dc-856a-64dc6f583a09","trip_distance":"1.94"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794209,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":657}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"243","PULocationID":"162","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"27","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"3","tolls_amount":"0","total_amount":"31.3"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Marques_Carter68@gmail.com","mobile":"640.971.7796 x1582"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-09-02 03:12:12","tpep_pickup_datetime":"2023-06-23 02:47:05","tripID":"a1e9eae9-63d9-4a67-ba4d-1d3a0e69a909","trip_distance":"8.38"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794213,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":661}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"87","PULocationID":"229","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"20.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"4.35","tolls_amount":"0","total_amount":"26.15"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Rosina93@hotmail.com","mobile":"1-587-969-1603"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-03-14 02:53:19","tpep_pickup_datetime":"2023-03-11 02:30:46","tripID":"26c2008a-c9c3-412e-ada4-28546d661647","trip_distance":"5.70"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794216,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":664}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"181","PULocationID":"142","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"49.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"5.76","total_amount":"56.56"},"passenger_count":"1","payment_type":"2","primary_passenger":{"email":"Keon_Gorczany@hotmail.com","mobile":"(268) 788-3566 x92699"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-03-25 02:56:52","tpep_pickup_datetime":"2023-12-19 02:12:23","tripID":"d2176c39-080d-484a-ba20-a9f01188d8d3","trip_distance":"16.40"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794220,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":668}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"236","PULocationID":"170","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"9.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2.15","tolls_amount":"0","total_amount":"12.95"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Adele_Langworth-Weimann82@hotmail.com","mobile":"1-941-205-3594 x0006"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-05-29 02:31:45","tpep_pickup_datetime":"2023-03-13 02:23:13","tripID":"81cd141b-1e9c-438d-b5ee-8c5259177063","trip_distance":"2.40"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794223,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":671}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"142","PULocationID":"236","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"9.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2.7","tolls_amount":"0","total_amount":"13.5"},"passenger_count":"2","payment_type":"1","primary_passenger":{"email":"Roosevelt_Spinka2@gmail.com","mobile":"996.831.1399 x956"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-02-28 02:44:07","tpep_pickup_datetime":"2023-06-29 02:33:46","tripID":"b439d3ef-94cc-4018-951d-cd5a00c1f87c","trip_distance":"2.10"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794227,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":675}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"4","PULocationID":"68","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"12.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"13.8"},"passenger_count":"3","payment_type":"2","primary_passenger":{"email":"Raphael_Borer35@hotmail.com","mobile":"609-381-7064"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-06-02 03:14:08","tpep_pickup_datetime":"2023-04-16 02:58:03","tripID":"992381b3-e012-43a2-be16-e5e0146e3681","trip_distance":"2.80"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794230,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":678}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"232","PULocationID":"68","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"17.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"18.8"},"passenger_count":"2","payment_type":"2","primary_passenger":{"email":"Nayeli.Kub-Weber41@hotmail.com","mobile":"414.556.6205 x636"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-03-10 02:41:44","tpep_pickup_datetime":"2023-04-19 02:19:47","tripID":"83157245-9369-415c-89fa-e2d9ae674821","trip_distance":"4.53"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794233,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":681}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"229","PULocationID":"232","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"13.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"14.8"},"passenger_count":"1","payment_type":"2","primary_passenger":{"email":"Nadia.Dicki6@gmail.com","mobile":"(622) 541-8901 x824"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-05-07 02:59:33","tpep_pickup_datetime":"2023-06-14 02:47:05","tripID":"60301101-b6f9-4327-96dc-daeb27937a04","trip_distance":"3.56"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794237,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":685}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"229","PULocationID":"79","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"10.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2.36","tolls_amount":"0","total_amount":"14.16"},"passenger_count":"3","payment_type":"1","primary_passenger":{"email":"Harold.Greenfelder52@hotmail.com","mobile":"498.394.3088 x841"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-09-16 02:11:24","tpep_pickup_datetime":"2023-06-21 02:00:55","tripID":"e2476cbc-c609-499b-bdf9-90d42bc60a4a","trip_distance":"2.83"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794244,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":692}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"33","PULocationID":"229","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"20","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"4.26","tolls_amount":"0","total_amount":"25.56"},"passenger_count":"3","payment_type":"1","primary_passenger":{"email":"Edwina_Jacobs56@hotmail.com","mobile":"207-669-3585"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-07-28 02:31:55","tpep_pickup_datetime":"2024-01-09 02:14:01","tripID":"3bdb160a-23f3-4827-8619-b6443594e71a","trip_distance":"6.28"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794287,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":735}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"158","PULocationID":"114","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"6.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"1.56","tolls_amount":"0","total_amount":"9.36"},"passenger_count":"3","payment_type":"1","primary_passenger":{"email":"Jazmin68@yahoo.com","mobile":"(998) 720-5079 x4750"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-02-04 02:55:20","tpep_pickup_datetime":"2023-02-10 02:48:24","tripID":"3068ea37-fc73-445f-801d-b6a5434b29bb","trip_distance":"1.14"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794290,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":738}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"45","PULocationID":"158","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"9.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"10.8"},"passenger_count":"3","payment_type":"2","primary_passenger":{"email":"Lexie42@yahoo.com","mobile":"477.656.8319 x51739"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-05-06 03:08:37","tpep_pickup_datetime":"2023-01-16 02:57:04","tripID":"28b0589f-9769-4a83-9cc7-78eee8eb3209","trip_distance":"1.97"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794294,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":742}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"79","PULocationID":"43","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"20.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"4.36","tolls_amount":"0","total_amount":"26.16"},"passenger_count":"3","payment_type":"1","primary_passenger":{"email":"Donald_Schowalter31@gmail.com","mobile":"221-276-6460 x6513"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-09-04 02:20:24","tpep_pickup_datetime":"2023-08-28 01:57:19","tripID":"a9fed048-4b77-47b0-a025-1085af772b64","trip_distance":"5.65"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794298,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":746}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"75","PULocationID":"79","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"19","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2","tolls_amount":"0","total_amount":"22.3"},"passenger_count":"2","payment_type":"1","primary_passenger":{"email":"Nola.Metz@yahoo.com","mobile":"332.759.9239"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-06-15 02:44:03","tpep_pickup_datetime":"2023-02-19 02:21:58","tripID":"e761c2c6-c757-47d8-96eb-cbc4e1cd7c90","trip_distance":"5.35"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794301,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":749}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"170","PULocationID":"79","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"9.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2.15","tolls_amount":"0","total_amount":"12.95"},"passenger_count":"2","payment_type":"1","primary_passenger":{"email":"Ibrahim_Daugherty@gmail.com","mobile":"800.910.7891"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-05-02 02:26:09","tpep_pickup_datetime":"2023-01-06 02:15:28","tripID":"e91e19cf-9d02-498f-b2c0-708d10106ad5","trip_distance":"2.10"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794304,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":752}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"162","PULocationID":"234","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"8","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"9.3"},"passenger_count":"1","payment_type":"2","primary_passenger":{"email":"Jacynthe.Bashirian79@gmail.com","mobile":"1-964-329-9021"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-03-29 02:43:11","tpep_pickup_datetime":"2024-02-06 02:34:21","tripID":"83ce3138-98dc-4c64-b28c-269a029886c8","trip_distance":"1.60"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794307,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":755}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"50","PULocationID":"50","RatecodeID":"5","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0","fare_amount":"0","improvement_surcharge":"0.3","mta_tax":"0","tip_amount":"0","tolls_amount":"0","total_amount":"0.3"},"passenger_count":"1","payment_type":"3","primary_passenger":{"email":"Rogelio34@hotmail.com","mobile":"898.781.9202 x14465"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-01-30 02:15:42","tpep_pickup_datetime":"2023-01-10 02:15:41","tripID":"c4d3ad69-2bf7-4e72-82ff-90ce63841704","trip_distance":".00"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794310,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":758}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"162","PULocationID":"48","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"8.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"9.8"},"passenger_count":"1","payment_type":"2","primary_passenger":{"email":"Elyse_Prohaska@yahoo.com","mobile":"(357) 962-5072 x6405"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-07-18 02:32:47","tpep_pickup_datetime":"2023-12-08 02:22:14","tripID":"169f7e3a-6c53-4ab7-bda9-5911b784f30c","trip_distance":"1.30"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794313,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":761}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"170","PULocationID":"161","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"3.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"4.8"},"passenger_count":"1","payment_type":"2","primary_passenger":{"email":"Ima1@gmail.com","mobile":"1-773-959-5605 x73199"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-05-07 02:40:03","tpep_pickup_datetime":"2023-07-18 02:38:18","tripID":"2743672b-c4d1-4ce8-adc1-f90b78d3a44b","trip_distance":".30"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794317,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":765}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"181","PULocationID":"232","RatecodeID":"3","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"36.5","improvement_surcharge":"0.3","mta_tax":"0","tip_amount":"7.45","tolls_amount":"0","total_amount":"44.75"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Lynn.McClure66@yahoo.com","mobile":"351-615-5695 x4474"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-05-04 03:14:04","tpep_pickup_datetime":"2023-07-06 02:52:04","tripID":"64dc9673-c0af-4dc6-b18e-b8a7f499dee9","trip_distance":"5.10"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794320,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":768}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"52","PULocationID":"33","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"3.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"4.8"},"passenger_count":"1","payment_type":"2","primary_passenger":{"email":"Madge_Bergstrom68@yahoo.com","mobile":"1-680-654-8869"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-11-08 02:14:26","tpep_pickup_datetime":"2023-07-14 02:13:08","tripID":"bbfc9834-4e11-4e68-bd28-ec927655f78b","trip_distance":".38"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794323,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":771}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"40","PULocationID":"52","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"5.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"6.8"},"passenger_count":"2","payment_type":"2","primary_passenger":{"email":"Della21@hotmail.com","mobile":"755.562.8568 x399"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-02-22 02:21:30","tpep_pickup_datetime":"2023-07-09 02:16:47","tripID":"8cd91fbc-927b-4e56-8172-a0350db5ad63","trip_distance":"1.15"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794327,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":775}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"255","PULocationID":"65","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"11.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2.56","tolls_amount":"0","total_amount":"15.36"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Bernadine.Friesen@yahoo.com","mobile":"555-470-0062"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2024-01-27 02:40:30","tpep_pickup_datetime":"2023-02-22 02:31:12","tripID":"1f8e276b-346d-4e00-b5d5-fcd688efbe56","trip_distance":"3.50"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794330,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":778}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"256","PULocationID":"255","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"4.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"1.16","tolls_amount":"0","total_amount":"6.96"},"passenger_count":"3","payment_type":"1","primary_passenger":{"email":"Linda96@gmail.com","mobile":"1-642-292-8585"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-07-10 02:45:24","tpep_pickup_datetime":"2023-03-28 02:41:58","tripID":"3e76db01-516a-4672-8a7f-d3ddfb7d285d","trip_distance":".81"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794333,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":781}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"255","PULocationID":"256","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"4","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"5.3"},"passenger_count":"2","payment_type":"2","primary_passenger":{"email":"Rosetta_Beer@hotmail.com","mobile":"798.313.5258 x6339"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-04-04 02:50:26","tpep_pickup_datetime":"2023-06-01 02:47:31","tripID":"6b41ee87-6fa1-4f83-9447-2e1e98667d5c","trip_distance":".62"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794336,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":784}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"90","PULocationID":"255","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"17","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"3.66","tolls_amount":"0","total_amount":"21.96"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Carmen57@gmail.com","mobile":"978.415.9386 x8923"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-04-08 03:09:43","tpep_pickup_datetime":"2023-06-26 02:50:53","tripID":"d501b0eb-c941-4328-9d0b-83001f652918","trip_distance":"4.64"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794340,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":788}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"162","PULocationID":"79","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"10","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2.26","tolls_amount":"0","total_amount":"13.56"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Oliver.Labadie10@yahoo.com","mobile":"(366) 454-2917 x477"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-12-21 02:29:12","tpep_pickup_datetime":"2023-05-11 02:18:46","tripID":"e08cf524-22a7-4e75-86aa-47497b76f378","trip_distance":"2.50"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794343,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":791}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"239","PULocationID":"162","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"8.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"9.8"},"passenger_count":"1","payment_type":"2","primary_passenger":{"email":"Dora26@yahoo.com","mobile":"(478) 966-5035 x192"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-10-08 02:41:45","tpep_pickup_datetime":"2023-08-06 02:33:12","tripID":"e2621721-e337-43b7-9c2f-eb2769bffd0f","trip_distance":"1.85"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794385,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":833}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"162","PULocationID":"239","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"9.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2.7","tolls_amount":"0","total_amount":"13.5"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Isabella_Wunsch89@gmail.com","mobile":"521.252.8903 x15758"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-06-06 02:53:08","tpep_pickup_datetime":"2023-08-28 02:43:48","tripID":"5f8ead46-df6d-427c-8f4e-8c51485f3bc4","trip_distance":"2.41"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794389,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":837}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"237","PULocationID":"162","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"1.26","tolls_amount":"0","total_amount":"7.56"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Marcelina_Cormier21@hotmail.com","mobile":"557-359-4984 x5088"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-03-05 03:02:05","tpep_pickup_datetime":"2024-01-11 02:57:55","tripID":"d0e83758-cf79-44e9-a138-2c8c84e7e1ab","trip_distance":".89"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794393,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":841}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"229","PULocationID":"141","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"4","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"5.3"},"passenger_count":"5","payment_type":"2","primary_passenger":{"email":"Evie.Pouros@yahoo.com","mobile":"277-513-8399 x256"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-11-27 02:09:02","tpep_pickup_datetime":"2023-04-09 02:06:09","tripID":"93968446-dafa-476f-9d7e-570304c3dfd8","trip_distance":".68"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794396,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":844}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"224","PULocationID":"229","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"10.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"11.8"},"passenger_count":"5","payment_type":"2","primary_passenger":{"email":"Christ.Kuvalis@gmail.com","mobile":"(441) 378-5258 x6569"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-03-17 02:24:27","tpep_pickup_datetime":"2023-11-29 02:10:40","tripID":"50587962-2be2-4719-a09b-b8eb35b7be10","trip_distance":"2.33"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794399,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":847}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"116","PULocationID":"107","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"34.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"7.16","tolls_amount":"0","total_amount":"42.96"},"passenger_count":"5","payment_type":"1","primary_passenger":{"email":"Alexandre_Walker@yahoo.com","mobile":"458-362-0622 x630"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-02-26 03:12:19","tpep_pickup_datetime":"2024-01-14 02:27:14","tripID":"da36b5dd-0704-400c-adb5-888be2f6ebf8","trip_distance":"9.17"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794402,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":850}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"262","PULocationID":"237","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"9","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"10.3"},"passenger_count":"1","payment_type":"2","primary_passenger":{"email":"Rosalinda79@hotmail.com","mobile":"(285) 935-0429 x620"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-04-04 02:22:29","tpep_pickup_datetime":"2024-01-11 02:13:41","tripID":"02412500-2ffe-45e7-bee5-a6666dc1d839","trip_distance":"2.14"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794406,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":854}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"90","PULocationID":"263","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"16","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"3.46","tolls_amount":"0","total_amount":"20.76"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Gerda55@hotmail.com","mobile":"551.761.2078 x469"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-08-24 02:48:40","tpep_pickup_datetime":"2023-08-27 02:29:15","tripID":"b21960c7-f5a0-4bca-9402-abf7bd246004","trip_distance":"3.93"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794409,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":857}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"79","PULocationID":"90","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"10.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2.36","tolls_amount":"0","total_amount":"14.16"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Corene27@gmail.com","mobile":"1-390-967-3132 x173"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-08-14 03:03:44","tpep_pickup_datetime":"2023-12-21 02:50:12","tripID":"d8253210-ce80-48bf-ad78-f9e9b02a7659","trip_distance":"2.01"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794412,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":860}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"234","PULocationID":"137","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"6.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"7.8"},"passenger_count":"3","payment_type":"2","primary_passenger":{"email":"Alicia9@hotmail.com","mobile":"1-967-665-8696 x087"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-09-24 02:10:42","tpep_pickup_datetime":"2023-04-26 02:02:57","tripID":"cb63a45c-d619-4526-88fb-7f020417342f","trip_distance":"1.07"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794416,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":864}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"79","PULocationID":"234","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"9.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2.16","tolls_amount":"0","total_amount":"12.96"},"passenger_count":"3","payment_type":"1","primary_passenger":{"email":"Rickey.Heller29@hotmail.com","mobile":"(315) 523-5370 x8087"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-06-11 02:25:05","tpep_pickup_datetime":"2023-09-30 02:12:49","tripID":"8840c605-b20b-41fd-a06e-c724e1a0a64b","trip_distance":"1.81"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794420,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":868}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"49","PULocationID":"79","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"16","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"17.3"},"passenger_count":"4","payment_type":"2","primary_passenger":{"email":"Aurore4@hotmail.com","mobile":"284-324-0491 x19914"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-06-18 02:25:05","tpep_pickup_datetime":"2023-02-09 02:27:35","tripID":"e67ffdc5-4302-48a8-8f44-cee84c2fe73b","trip_distance":"3.84"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794423,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":871}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"234","PULocationID":"236","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"14.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"15.8"},"passenger_count":"1","payment_type":"2","primary_passenger":{"email":"Jerrod_Osinski@hotmail.com","mobile":"1-689-477-9312 x99969"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2024-01-30 02:37:11","tpep_pickup_datetime":"2023-01-27 02:20:17","tripID":"95ef5fab-051a-4e15-9a4a-9cf49ca65231","trip_distance":"3.78"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794426,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":874}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"232","PULocationID":"234","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"11","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"12.3"},"passenger_count":"1","payment_type":"2","primary_passenger":{"email":"Brody.Strosin@yahoo.com","mobile":"300-552-9715 x358"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-08-06 02:51:00","tpep_pickup_datetime":"2023-05-19 02:38:39","tripID":"7fa48c82-4c15-4204-840b-a633b344cced","trip_distance":"2.39"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794430,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":878}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"87","PULocationID":"232","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"12","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2.66","tolls_amount":"0","total_amount":"15.96"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Karlee_Bosco@hotmail.com","mobile":"891-778-9086 x5622"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-06-09 03:05:21","tpep_pickup_datetime":"2023-11-24 02:54:32","tripID":"3600dd4d-d3d4-48d4-8220-f02200161d3a","trip_distance":"3.00"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794433,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":881}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"236","PULocationID":"48","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"11","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2.46","tolls_amount":"0","total_amount":"14.76"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Madelyn.Ullrich@yahoo.com","mobile":"725.333.9195"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-09-09 02:23:56","tpep_pickup_datetime":"2023-05-27 02:14:49","tripID":"c1049b7b-cf57-486b-a154-cdf1309b6841","trip_distance":"3.42"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794437,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":885}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"150","PULocationID":"162","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"55.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"14.2","tolls_amount":"0","total_amount":"71"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Waylon14@hotmail.com","mobile":"564-322-3411 x656"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-12-22 03:11:46","tpep_pickup_datetime":"2023-12-07 02:33:40","tripID":"9d42a2fd-9910-4c79-86d4-262e3b7f5fae","trip_distance":"19.97"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794440,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":888}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"147","PULocationID":"41","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"19","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2","tolls_amount":"0","total_amount":"22.3"},"passenger_count":"2","payment_type":"1","primary_passenger":{"email":"Manley_Hackett71@gmail.com","mobile":"1-871-508-0977 x05891"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-05-09 02:22:37","tpep_pickup_datetime":"2023-06-04 02:00:35","tripID":"b8946252-73ef-4e89-9af2-22ade786e381","trip_distance":"4.96"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794484,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":932}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"244","PULocationID":"75","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"16.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"17.8"},"passenger_count":"1","payment_type":"2","primary_passenger":{"email":"Marcus67@hotmail.com","mobile":"546-381-5618 x705"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-08-19 03:00:20","tpep_pickup_datetime":"2023-05-01 02:49:25","tripID":"ac08ee2e-a254-4a8a-85c3-1003e93b8df3","trip_distance":"5.42"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794488,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":936}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"170","PULocationID":"65","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"19.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"1","tolls_amount":"0","total_amount":"23.75"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Rebeca78@gmail.com","mobile":"857.384.4490 x9274"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-03-21 02:37:59","tpep_pickup_datetime":"2023-01-10 02:15:26","tripID":"a14db847-b6b2-4377-a8f8-be62cbc6c58b","trip_distance":"5.08"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794491,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":939}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"79","PULocationID":"170","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"7","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"1.66","tolls_amount":"0","total_amount":"9.96"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Claudie_Hegmann@hotmail.com","mobile":"1-273-731-1442 x2409"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-04-17 02:46:37","tpep_pickup_datetime":"2023-09-20 02:39:34","tripID":"6e4e34f8-2747-4298-9d46-3e52e1ad4a49","trip_distance":"1.41"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794495,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":943}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"238","PULocationID":"79","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"16","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"3.46","tolls_amount":"0","total_amount":"20.76"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Antonetta_Davis@hotmail.com","mobile":"371-641-0671 x207"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-05-12 03:05:15","tpep_pickup_datetime":"2023-04-30 02:49:45","tripID":"68b10553-f67d-4acd-ae8a-bd11b02f805d","trip_distance":"5.05"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794499,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":947}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"48","PULocationID":"48","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"6","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"1.45","tolls_amount":"0","total_amount":"8.75"},"passenger_count":"2","payment_type":"1","primary_passenger":{"email":"Clovis.Grady7@yahoo.com","mobile":"344-298-3292"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-01-16 02:08:57","tpep_pickup_datetime":"2023-02-12 02:02:52","tripID":"6b6ab88d-6ae4-407a-9d96-cafc40925f26","trip_distance":".90"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794502,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":950}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"90","PULocationID":"48","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"11","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"3.65","tolls_amount":"0","total_amount":"15.95"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Eve_Erdman51@hotmail.com","mobile":"221.581.9132 x7964"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-07-31 02:25:57","tpep_pickup_datetime":"2023-05-13 02:11:55","tripID":"ad35aadc-0dcb-43f6-8140-47fc8f10d3ad","trip_distance":"2.20"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794505,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":953}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"229","PULocationID":"90","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"13","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2","tolls_amount":"0","total_amount":"16.3"},"passenger_count":"4","payment_type":"1","primary_passenger":{"email":"Ernestina_Corwin@gmail.com","mobile":"1-240-291-8146 x5567"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-01-24 02:43:12","tpep_pickup_datetime":"2023-01-12 02:27:06","tripID":"4eb66446-fd61-4410-b056-ffc4f512eb80","trip_distance":"2.90"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794509,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":957}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"170","PULocationID":"234","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"7.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2.6","tolls_amount":"0","total_amount":"11.4"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Oscar.Witting@hotmail.com","mobile":"281-827-1431 x92694"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-03-08 02:22:54","tpep_pickup_datetime":"2023-05-25 02:14:07","tripID":"21963119-b955-47c7-8dda-d8804bfeadf7","trip_distance":"1.50"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794513,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":961}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"263","PULocationID":"162","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"8","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2","tolls_amount":"0","total_amount":"11.3"},"passenger_count":"2","payment_type":"1","primary_passenger":{"email":"Stone81@yahoo.com","mobile":"889-393-8174 x1107"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-07-27 02:34:52","tpep_pickup_datetime":"2023-07-23 02:28:20","tripID":"2b8b5deb-9b99-488f-9a66-bf07b19fd97f","trip_distance":"2.10"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794516,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":964}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"263","PULocationID":"237","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"6","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"1.45","tolls_amount":"0","total_amount":"8.75"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Winfield_Mayer42@yahoo.com","mobile":"(618) 838-8049"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-08-07 02:37:52","tpep_pickup_datetime":"2023-08-06 02:32:47","tripID":"e3ca466a-f2b9-4e65-84dd-66201b11a92f","trip_distance":"1.20"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794519,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":967}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"41","PULocationID":"237","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"10","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"3.35","tolls_amount":"0","total_amount":"14.65"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Jaeden.Von@gmail.com","mobile":"1-504-649-6832"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-02-25 02:58:27","tpep_pickup_datetime":"2023-03-23 02:48:45","tripID":"3ecddca9-4247-475a-bd93-c572db096fc4","trip_distance":"2.70"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794522,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":970}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"87","PULocationID":"66","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"10.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"11.8"},"passenger_count":"1","payment_type":"2","primary_passenger":{"email":"Rosella.Rolfson-Gibson@yahoo.com","mobile":"(617) 788-8313 x2277"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-08-19 02:15:18","tpep_pickup_datetime":"2023-03-25 02:05:08","tripID":"5c3d9658-0579-4365-82f3-f567c6fab6b6","trip_distance":"2.40"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794526,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":974}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"37","PULocationID":"87","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"26","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"5.45","tolls_amount":"0","total_amount":"32.75"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Pinkie17@yahoo.com","mobile":"206-799-5614"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-05-28 02:42:36","tpep_pickup_datetime":"2023-12-30 02:17:57","tripID":"f2213841-8cae-4c86-aa17-ab8150b0353b","trip_distance":"8.20"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794529,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":977}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"17","PULocationID":"37","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"6.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"7.8"},"passenger_count":"1","payment_type":"2","primary_passenger":{"email":"Carmine44@gmail.com","mobile":"369-639-0569 x643"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-11-29 02:50:36","tpep_pickup_datetime":"2024-01-16 02:43:48","tripID":"d5f74519-08c3-4ee1-9e2d-9b84876e157e","trip_distance":"1.10"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794532,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":980}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"107","PULocationID":"211","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"8.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"9.8"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Ardith.Tromp@hotmail.com","mobile":"612-805-1546 x7728"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-09-08 03:03:30","tpep_pickup_datetime":"2023-05-30 02:54:05","tripID":"4934ae36-14f0-46ea-8087-497b87aa6d5e","trip_distance":"1.90"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794536,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":984}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"140","PULocationID":"229","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"1.26","tolls_amount":"0","total_amount":"7.56"},"passenger_count":"5","payment_type":"1","primary_passenger":{"email":"Reinhold12@hotmail.com","mobile":"(478) 224-3991 x93298"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-12-15 02:16:51","tpep_pickup_datetime":"2023-01-01 02:13:30","tripID":"1d290a9e-1244-46ef-b22c-08336ea56864","trip_distance":"1.08"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794539,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":987}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"230","PULocationID":"161","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"8","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2.32","tolls_amount":"0","total_amount":"11.62"},"passenger_count":"5","payment_type":"1","primary_passenger":{"email":"Eric29@yahoo.com","mobile":"774.435.3330 x4929"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-06-30 02:44:36","tpep_pickup_datetime":"2023-04-29 02:33:54","tripID":"4ef82abf-f75e-425b-a0d6-9b8d4521addb","trip_distance":"1.39"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794542,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":990}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"234","PULocationID":"230","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"9.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"10.8"},"passenger_count":"5","payment_type":"2","primary_passenger":{"email":"Rashad.Klocko@gmail.com","mobile":"1-902-813-8402 x18121"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-05-18 02:59:59","tpep_pickup_datetime":"2023-06-16 02:48:50","tripID":"cca51610-3623-4353-befe-465bcbcb6e5b","trip_distance":"1.91"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794545,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":993}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"107","PULocationID":"249","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"11","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2.45","tolls_amount":"0","total_amount":"14.75"},"passenger_count":"2","payment_type":"1","primary_passenger":{"email":"Germaine.Johnson16@yahoo.com","mobile":"1-309-548-7894 x77988"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-06-05 02:55:40","tpep_pickup_datetime":"2023-09-28 02:41:47","tripID":"3f2e3f93-9088-4ba1-aad0-779c04192b96","trip_distance":"2.10"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794587,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1035}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"237","PULocationID":"151","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"10.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"11.8"},"passenger_count":"1","payment_type":"2","primary_passenger":{"email":"Taylor.Green@yahoo.com","mobile":"(571) 811-5436 x433"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-02-06 02:10:38","tpep_pickup_datetime":"2023-09-02 02:01:05","tripID":"1eb1e4c1-9c23-4dd4-8915-0986404ac3e8","trip_distance":"2.50"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794591,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1039}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"226","PULocationID":"230","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"17.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"18.8"},"passenger_count":"2","payment_type":"2","primary_passenger":{"email":"Keenan.Brown60@yahoo.com","mobile":"447-236-4948 x26679"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-09-05 02:44:31","tpep_pickup_datetime":"2023-02-10 02:23:48","tripID":"54287dc9-743e-40b3-a2e7-16c8139a405d","trip_distance":"4.50"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794594,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1042}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"48","PULocationID":"211","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"15.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"3.36","tolls_amount":"0","total_amount":"20.16"},"passenger_count":"6","payment_type":"1","primary_passenger":{"email":"Joana.Murray16@gmail.com","mobile":"1-205-355-1857 x6037"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-12-28 02:29:36","tpep_pickup_datetime":"2023-09-21 02:11:57","tripID":"cfc6db54-05f3-4ec3-9fac-35bc0642b5b0","trip_distance":"4.39"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794598,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1046}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"186","PULocationID":"48","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"6","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"7.3"},"passenger_count":"6","payment_type":"2","primary_passenger":{"email":"Pietro34@gmail.com","mobile":"(214) 940-1846"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2024-01-30 02:42:32","tpep_pickup_datetime":"2023-06-01 02:35:36","tripID":"40ddb784-9baa-49cc-b445-ba7c3c3a5350","trip_distance":".74"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794601,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1049}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"239","PULocationID":"186","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"15","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2.44","tolls_amount":"0","total_amount":"18.74"},"passenger_count":"6","payment_type":"1","primary_passenger":{"email":"Hunter60@gmail.com","mobile":"1-605-974-7545 x2362"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-10-05 03:00:57","tpep_pickup_datetime":"2023-09-05 02:43:53","tripID":"1b84cc94-63cd-4bf7-a5a2-9adf220323de","trip_distance":"3.77"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794604,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1052}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"107","PULocationID":"231","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"10.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2.36","tolls_amount":"0","total_amount":"14.16"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Berneice0@hotmail.com","mobile":"355.597.9064 x85306"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-12-02 02:15:56","tpep_pickup_datetime":"2023-09-05 02:04:15","tripID":"09cd6414-1451-4e47-903b-8e6020b85f0c","trip_distance":"2.59"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794607,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1055}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"237","PULocationID":"137","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"10.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"5","tolls_amount":"0","total_amount":"16.8"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Josephine_Thompson@hotmail.com","mobile":"(816) 899-1039 x9110"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-09-15 02:32:57","tpep_pickup_datetime":"2023-12-19 02:22:15","tripID":"0ea57d99-2441-4a96-aa30-d50601ae805e","trip_distance":"2.60"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794610,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1058}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"223","PULocationID":"170","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"29","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"7.21","tolls_amount":"5.76","total_amount":"43.27"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Blaze29@yahoo.com","mobile":"550-296-4027 x96854"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-01-22 03:14:31","tpep_pickup_datetime":"2023-08-11 02:47:39","tripID":"e7cd1818-3ac4-4c26-be7e-2811d3cf6eb9","trip_distance":"9.49"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794614,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1062}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"114","PULocationID":"79","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"6","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2.7","tolls_amount":"0","total_amount":"10"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Gerda98@yahoo.com","mobile":"(771) 247-2009"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-10-09 02:17:42","tpep_pickup_datetime":"2023-04-14 02:11:46","tripID":"8f1d4aeb-d3a9-41f1-b869-dceba333ddb2","trip_distance":".90"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794617,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1065}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"79","PULocationID":"114","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"8","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"1.85","tolls_amount":"0","total_amount":"11.15"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Lina78@hotmail.com","mobile":"(737) 801-0055 x389"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-10-05 02:28:10","tpep_pickup_datetime":"2023-02-02 02:18:49","tripID":"39948704-84d9-439f-a8f1-a041ec9d6882","trip_distance":"1.30"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794620,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1068}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"238","PULocationID":"79","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"23.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"1","tolls_amount":"0","total_amount":"25.8"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Zachary.Kozey@hotmail.com","mobile":"1-367-977-9321 x10540"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-01-25 02:59:40","tpep_pickup_datetime":"2023-09-07 02:33:22","tripID":"edc121f9-b36f-4b24-afa9-b9fbadbfa297","trip_distance":"6.40"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794624,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1072}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"61","PULocationID":"162","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"28","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"7.01","tolls_amount":"5.76","total_amount":"42.07"},"passenger_count":"4","payment_type":"1","primary_passenger":{"email":"Ralph.Greenfelder@hotmail.com","mobile":"646.847.7460 x03211"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-01-16 02:35:52","tpep_pickup_datetime":"2023-03-09 02:10:17","tripID":"7f53bade-5ce2-4666-8c48-ed3af53255f4","trip_distance":"8.88"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794627,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1075}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"49","PULocationID":"49","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"4","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"5.3"},"passenger_count":"3","payment_type":"2","primary_passenger":{"email":"Jermaine.Homenick@yahoo.com","mobile":"264.624.0022 x756"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-11-05 02:41:11","tpep_pickup_datetime":"2023-04-22 02:39:34","tripID":"64d7eb34-3d39-45c5-9090-acf4b28d8c6f","trip_distance":".61"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794630,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1078}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"48","PULocationID":"148","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"14","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"3.06","tolls_amount":"0","total_amount":"18.36"},"passenger_count":"5","payment_type":"1","primary_passenger":{"email":"Annetta_Heller@hotmail.com","mobile":"(754) 546-3157"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-03-17 03:15:03","tpep_pickup_datetime":"2023-01-23 02:57:58","tripID":"4d482dfe-2fcf-49e3-8fa0-daa7b7f05655","trip_distance":"3.55"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794634,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1082}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"164","PULocationID":"230","RatecodeID":"5","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0","fare_amount":"20","improvement_surcharge":"0.3","mta_tax":"0","tip_amount":"0","tolls_amount":"0","total_amount":"20.3"},"passenger_count":"2","payment_type":"1","primary_passenger":{"email":"Juston25@hotmail.com","mobile":"638-267-0522 x2725"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-06-24 02:24:26","tpep_pickup_datetime":"2023-07-22 02:14:37","tripID":"8bd8e0b7-c3f9-4080-af62-7e6f7d62033b","trip_distance":"1.40"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794637,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1085}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"209","PULocationID":"234","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"16.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"1","tolls_amount":"0","total_amount":"18.8"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Beulah_Reichel@gmail.com","mobile":"268.992.6069 x11480"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-11-22 02:46:20","tpep_pickup_datetime":"2024-01-02 02:30:49","tripID":"6be6e503-53d3-449e-9b6e-3a8ab535d512","trip_distance":"4.70"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794640,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1088}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"141","PULocationID":"249","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"12","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2.66","tolls_amount":"0","total_amount":"15.96"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Agnes_Hansen@gmail.com","mobile":"(549) 807-8566 x7952"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-09-25 03:10:18","tpep_pickup_datetime":"2023-10-13 02:58:01","tripID":"c6d635dc-daf6-4c45-8e27-7fe0ca048dcc","trip_distance":"3.40"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794643,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1091}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"13","PULocationID":"170","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"14.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"15.8"},"passenger_count":"1","payment_type":"2","primary_passenger":{"email":"Judah_Johnson44@hotmail.com","mobile":"363-606-8572"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2024-01-29 02:21:18","tpep_pickup_datetime":"2023-09-18 02:05:14","tripID":"1412a958-bb92-41fe-932e-a5299a3fff0f","trip_distance":"3.96"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794687,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1135}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"79","PULocationID":"261","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"12","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"13.3"},"passenger_count":"1","payment_type":"2","primary_passenger":{"email":"Marisol.Beier-Miller@yahoo.com","mobile":"1-309-668-7163"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2024-01-28 02:38:04","tpep_pickup_datetime":"2023-11-07 02:23:14","tripID":"c0cdc45b-22e7-4ecc-a627-5c8b67d87e46","trip_distance":"2.59"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794690,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1138}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"42","PULocationID":"79","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"28.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2.98","tolls_amount":"0","total_amount":"32.78"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Thalia_Ortiz88@yahoo.com","mobile":"407-555-4002 x44648"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-11-24 03:03:01","tpep_pickup_datetime":"2023-07-25 02:30:57","tripID":"e0b47894-d40d-42c2-8336-c8b9c4ed3a33","trip_distance":"8.43"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794694,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1142}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"246","PULocationID":"68","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"7","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"1.66","tolls_amount":"0","total_amount":"9.96"},"passenger_count":"2","payment_type":"1","primary_passenger":{"email":"Hans.Smitham9@yahoo.com","mobile":"(339) 364-0841"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-06-19 02:16:50","tpep_pickup_datetime":"2023-10-09 02:10:41","tripID":"cd5a37af-4c4f-4424-9c93-b1244ad04fd7","trip_distance":"1.43"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794697,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1145}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"68","PULocationID":"246","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"4","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"5.3"},"passenger_count":"2","payment_type":"2","primary_passenger":{"email":"Lourdes38@hotmail.com","mobile":"1-751-666-0619 x542"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-01-02 02:16:50","tpep_pickup_datetime":"2023-12-20 02:17:22","tripID":"1f8d2fa0-a798-48b8-a9a9-e20e316d5531","trip_distance":".66"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794701,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1149}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"140","PULocationID":"48","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"10.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"11.8"},"passenger_count":"2","payment_type":"2","primary_passenger":{"email":"Riley_Yundt-Turcotte@gmail.com","mobile":"220.269.1280"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2024-02-03 02:35:05","tpep_pickup_datetime":"2023-08-14 02:23:08","tripID":"3ce204d3-9cba-46c1-85a1-f21292758378","trip_distance":"2.41"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794704,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1152}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"141","PULocationID":"141","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"4.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"1.74","tolls_amount":"0","total_amount":"7.54"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Edward14@yahoo.com","mobile":"320-494-1809 x531"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2024-01-30 02:39:22","tpep_pickup_datetime":"2024-01-14 02:36:30","tripID":"cdd42be7-9a59-4059-b8f1-dc3dddfdc0a2","trip_distance":".65"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794707,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1155}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"236","PULocationID":"143","RatecodeID":"1","VendorID":"1","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"10","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2.25","tolls_amount":"0","total_amount":"13.55"},"passenger_count":"2","payment_type":"1","primary_passenger":{"email":"Blaise_Leffler7@hotmail.com","mobile":"275-547-8169 x12135"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-05-26 02:27:43","tpep_pickup_datetime":"2023-04-19 02:17:48","tripID":"986787ac-976d-4148-911e-a502b40ed9f2","trip_distance":"2.50"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794710,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1158}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"239","PULocationID":"236","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"7.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"1.76","tolls_amount":"0","total_amount":"10.56"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Anastacio0@gmail.com","mobile":"592.584.9471 x84515"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-03-11 02:25:16","tpep_pickup_datetime":"2023-07-16 02:17:47","tripID":"9b42bfac-1876-470a-86df-4f1dcff160cd","trip_distance":"1.68"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794714,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1162}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"75","PULocationID":"142","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"11.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"12.8"},"passenger_count":"1","payment_type":"2","primary_passenger":{"email":"Brooklyn.Feest@yahoo.com","mobile":"538.970.5053 x607"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-04-26 02:44:12","tpep_pickup_datetime":"2023-09-26 02:31:06","tripID":"7cfc8967-0cbd-45cb-a5a8-a9e7cb05f407","trip_distance":"2.73"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794717,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1165}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"145","PULocationID":"236","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"14.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2","tolls_amount":"0","total_amount":"17.8"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Roberto.Lockman@hotmail.com","mobile":"409-514-9426 x587"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-06-18 03:04:58","tpep_pickup_datetime":"2023-11-02 02:50:32","tripID":"7255441b-919a-4cac-89b5-96f7ff21e0db","trip_distance":"4.12"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794722,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1170}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"163","PULocationID":"48","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"10","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"2.26","tolls_amount":"0","total_amount":"13.56"},"passenger_count":"3","payment_type":"1","primary_passenger":{"email":"Margarett.Sawayn@yahoo.com","mobile":"(866) 903-8125 x36548"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-07-15 02:17:06","tpep_pickup_datetime":"2023-06-22 02:03:24","tripID":"fd218127-8604-4a99-aa7b-51852fca4b2f","trip_distance":"1.24"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794725,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1173}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"239","PULocationID":"237","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"6","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"7.3"},"passenger_count":"3","payment_type":"2","primary_passenger":{"email":"Kaylah_Shields55@yahoo.com","mobile":"(924) 980-5772"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-08-19 02:26:50","tpep_pickup_datetime":"2023-11-17 02:22:43","tripID":"94df85f8-4b0a-45eb-a45a-ccc11dad11ab","trip_distance":"1.24"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794729,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1177}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"231","PULocationID":"158","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"7.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"1.76","tolls_amount":"0","total_amount":"10.56"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Alverta.Armstrong44@yahoo.com","mobile":"(878) 812-6613 x69201"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2024-01-20 02:21:36","tpep_pickup_datetime":"2023-01-20 02:13:30","tripID":"7b16e77f-898f-4793-879f-ccf0c85798d6","trip_distance":"1.37"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794732,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1180}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"107","PULocationID":"231","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"8.5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"1.96","tolls_amount":"0","total_amount":"11.76"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Esperanza15@hotmail.com","mobile":"692-945-2420 x1931"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-11-15 02:31:22","tpep_pickup_datetime":"2023-01-02 02:22:45","tripID":"e11b4c6c-2c0d-46ee-ae04-0700d67c131f","trip_distance":"2.00"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794736,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1184}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"186","PULocationID":"107","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"6","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"1","tolls_amount":"0","total_amount":"8.3"},"passenger_count":"1","payment_type":"1","primary_passenger":{"email":"Casper.Romaguera87@gmail.com","mobile":"491-889-2259"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-07-04 02:44:09","tpep_pickup_datetime":"2024-01-13 02:37:12","tripID":"5d52db41-4838-4a12-876b-056abd939f62","trip_distance":".92"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524794740,"processingStartTime":1711524793552,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"44f1bd01-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524793552,"timespans":{"extractor":1188}}} +{"dataset":"new-york-taxi-data","event":{"DOLocationID":"170","PULocationID":"164","RatecodeID":"1","VendorID":"2","fare_details":{"congestion_surcharge":"","extra":"0.5","fare_amount":"5","improvement_surcharge":"0.3","mta_tax":"0.5","tip_amount":"0","tolls_amount":"0","total_amount":"6.3"},"passenger_count":"1","payment_type":"2","primary_passenger":{"email":"Eudora.Dickens@gmail.com","mobile":"(252) 559-0694 x683"},"store_and_fwd_flag":"N","tpep_dropoff_datetime":"2023-05-22 02:51:52","tpep_pickup_datetime":"2023-01-16 02:48:04","tripID":"35cc2967-ce3d-4dac-a75f-74347e277630","trip_distance":".82"},"obsrv_meta":{"flags":{"extractor":"success"},"prevProcessingTime":1711524803950,"processingStartTime":1711524803850,"source":{"meta":{"connector_type":"api","entry_source":"api","id":"","version":"1.0"},"trace_id":"4b1516a1-ec0c-11ee-be1c-fbcf66548089"},"syncts":1711524803850,"timespans":{"extractor":100}}} diff --git a/tests/sample_data/nyt_data_100.json.gz b/tests/sample_data/nyt_data_100.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..f6cd64609fc390927c770fd54f786ba0aa316681 GIT binary patch literal 10228 zcmVl~-v%dIbeObEtu)UVnj@=hV zbyu~BI~ZUBBqh=g|M$%T8lxdqt%g`l$ZCh>utjo6aM*`=GV|m)|NJ0^+i;U^Ke+s0 zlYSHr+udt%8}3&kzx{*%`XK#~HvGp^zutD?cD3F7_ow{9fR^9;m;dwljYRy$ zzlPh?ZDTsTiQn+Iw29l@yMOJ&E?vcR8&>O^{F%G$=KFM$Kgre2-LAV1yYCZ!X#V2& zx4XdqS2p_i=fa!qU4B6y`2E{fZ{F^*&x67&26~iZ|2=+xLv*d=@Y01 z_HWy+*Ec`CRqo$(8`eL)2~^|t2mku758j5Go5Zhmchw!=^+A5?!~VFrhAZQpMD{)&HC>2~#I{|Tf*A+pG`RCc}znA=y)7AdnL_M@of=He01VBonP0)$H?RsN0IqCfV zNB6MlZf}0}0`p(}&D*ef`gdX7zy9^F|Mkyj19FeOCxBGsCjhDD17s~6r8yOjXv%Oj z&w%6SY5POiY*s*jxZd8DAgPsUG}y*ju+N7i$UP(#|MNa1d$fIr zu@fmuE6B9Okc_S9r0T8ftaT<7APJ4e`5{RQhon+Idj-*ZFB{XQ!Ufcut=VFqCR9q=D%+O!T$ubE7%pSn* z^1vL9m^4c2(6ycL&@v{YCDFG02x`~KzE>@&ghfok3t-YN9h17QOqxQOv}df6wV1@y ziX{FDP)^p!ukThjf$Rt#!8D4PBcsWMB5O} zH{tp7+v~Jmg{x1uTjsIemnS_0y5XN~gVLyrd!r1Q3UtB|oped&gzi!gg%ckYPU&Oc z%T@`)j~6r9j%%huExOdpq@C0jO9LO**cT|AOJ}maj>)Rhzz6ofZO;TyRSQsR>}j=A zxkiuF&gTI1s~tbPtIgHNyY%{XD%Vc5j?0r2Ol_$Gm;BG3&?+3D_RCdM?W?9VTxH+K zr460NBqI~r4h326FoX0)^e%d75RBKQs%abLXUC;p`XJU<4q{6krrQ5f{?8cu6c9N% z_W4U#zX{FX!}{jhnXy5v=O^A-%hXaq1ths`bbD?pFSVb7q!nsEwI%-N(ID1=4BGTb zIN5qZsz(uwibBb*C!b8$Cnyrx7Z}9qr4w0@bxc;N(kRxPZxrj!gGoOpCcnGeZPL!- z7#u&epk7@ll>uq#;+{}He;CV9GXii=Vd1o$r8({_AK?-db|x4SzSZ1dl1JfU>V=KQ zBz~bNp^Qua0;O{46c%JHhb`bz;neVFJ{18}b6C~b6J-RZ(KE^jz6=|N#?{xM4RMvq zEv5%4EO8@eQD596RLy_PFq&0#DJRRO6Uy(Iti9Cu2%avgCQU+572v5FGry$3EPXT!vTio3_P;8BwWeS`c$#w*P21pnMf0y)e#4D7Wk7zYtbcXk z@>o_PmrPfQNV#tpQC-61%mZx;1UMq_rv(7 zh-~}|0CMp>7V3CxQRH(@)g9IW5}E_~n#81T2PW7=GOxxL=pj7@*IBs8S3k;d4I zd*f=BmwMQHpuD6>Skkb(%$v?ft!0~}Zjq$uBRWB<&EkicgqDg@i*0bRr25e>!DRi? zmn49dTM}7N7aXv99$ZrG;0I2Hj`@G zOe~ODIi>xXk<3;Oez2j_u*rPZ&Iry=)?xR$`DVKb*I`p8wE2ii0|-a(>?LLf*?=>x zFKKdFLc_9z+fnlGgOWSQ4)j8I!a)F0ihs;k^`xIeD9UoVA>v|Fd;BP9gt`}(_7PFP+*m`sVyMXzCf>O<>L1x zRJSiNr5TW!Qsm88HqRn505j*prT5w8MZBCCaBq&Ud8UtZdp18F7XUc#>6@}Op& z=ra!oCUmJE6-+`v=(4~-@smhx=!H{VCosgWHLW9$rDIHb0ZMD-;wi1Jq_oe|<^3DE zh@4n5<(M$9ur`@2nV;+)Zt|!9A~7sC8%O2R>6~wjg~p<);jxC1w6`MgmwHlVz@6q{ zIP0HUiTJklgi4^x!g?8{@KJYyv{5E@ou#hCW7SLWSXexd)s?y*Rp>PFdo_M_m)BSF>2$5iiruVl)d1} z*$b15RFHaQT3tXTHgfh0X$y-du%^;ZL>CmY)0>hRVR@#@5*R%#p=SW|o3wt|gyx&= zZpH7ty-kWnQn|)7|81LU6;@~kU6USc}~UrY#q0o=JVCf zI^1kGVnf%`5(jn)FtF28EVv}|UU(msEq2CO(+dlMizfDxUnGMV z7>>P_sFf@k7rewgRxO^%+RAyXDRh@~zIklzmINDh;>DLAz4&^LTK3bhxlLD}@4|I> zGlophB$T`s#v&71?WkrT3yjV|mUj@qPV7qd&;)$qDH08(kf0Z!vty+-*|D;ZOn(FS zmsYx%N_>`HV)COF&u48dpDnF0@qEgkN*CFa$FUJ^dUwF@Z?Gt30L@jWc6hT{lWdRwPzUzdiHlat? zxuW_9USd^(i=WBbN}s&as^qZR{CymlVB=0)Vq#X-&bGw-Cj8HKXGY9q^M$lRBamuV zml=2T-XF^~St9lV2^+NWY>Ah`{d}$u5U%+nNF5cT2 zD$Sist>v`K4=tp(e9nI4i?mLgtFQQ7ck9liUaE zZSnkxmC}!@*Pqj1e!L40VWURuGbz$=1#{NcHAwtcOv>NK5I(mfsGBgh)HrLHZ zS6{tMCL(o$2#`r^up-D9MAvs@l%x=KN$+D`g2XP~>lrG|o=e!AG+&<)n&;V-jC!vZ zswPf>^1HxK#;*C>b~9EnF<(1^5#IS4`z^b7kUeGEvp+vM$Y_6O?~XJ-^1Paw{KRr*JBmac@heU(Wx?%MPn-UvHML4`O~hX?a>?P(H*Ar*61!s(I+mI zuOBuW7>O;>HVaXO zR#mPda6Az)m+}M?u7f=uxhJC^8T-_Conb z@>AVtp6Vn!QY#C9oUD~!uhy%uNq%%s!cVv`n*Z0>9E53P#V|InJ`Q^l2HGSl%|Ku^ zJPAeU&}Tspii91vy~cXsj11YmucsJo46>{!n16%m7JnD5D(|8X?vzha3Fj-8)dpNA z>hSEWGQ5x0>HZ&#ue7tH%_W1vZ`i>DTkvgW9f1sLSa`lO;G&RJ zpJ{bZK~M+WT((Y&fSR_5vPZFkC-Kq2K?r$F3@WG9=FYcG zwigCeFs;tBpAwa&mnfRBcq*$aC$g?!CUQpcx;<0P9G9*?&wp#PO6veLo+V_=s|%}{ zS=t)C&yZ59F6n4>H*rpKz^@yu4c_rsMHoCvVX;kuz_r4qt{2hylwx#U3+18D{MPFZ zaOsdVFuz;D0jFmg`n*JYIpT|lqrQ00n#W#;JAQN@oErnC27l14E6T0~XrLwy(=eIM zI@x?AGO1hl?{%3^5=#^WC6i!U%}iEzPH@i|xz&WEtyRtx=ai zZ}J3l#x2P?=00vtne3GBM3iHkFwg%F<60 zTsY2(-UTBlPl5jvAS$Yi8LnT(nBKU?1zyxTt`Z+kY zY3z*BypPsx+TAw)_iht!%UcrzihprH<0;=qlRQL!JlncJ?E`BGFZHAgm);8u-b=TA z=fY*82L2g~E)oiAJ1bhPf-n+186#bU@+NbB7cRH>`)E~pAFWg=O=xm5J7bV^ZP~OZ zRZW;Neea8Od%H?*boOHnHkP=DmeMjNwXf+g)xN`IZb~O~nD!VYq%Rdb#w7ntooxkB zzW1^&JL9#)rQM=3zPF^xd(8PoKMohqWNoFopy(%ZHbg>o$;1Xup_Yf4?x zBhFhNTIF6{Xl?S8C3}hJstaj5+;1s=36la@fAkVDPsDhtcQQ$r1oVjx7t*aUNtl+R z$G+2jD2vx|{#v>C3u#q(Azc)&PmA$pY%$NH`Z05w$bOn`@=R9aSV)P6R~Jh8d?5`N z_x!sPYs7d*&-FZ$J!M_u2O|;nIMf~cD0{C3bS?;z#19}BO_8Dju+gfB#iK7fe~0Op zeu9Zu=_4WwsGMA2{u|AG4kr_*Xz^z3EdRdRx0X0M`pJd`+^q?)N?qK0Tf<@XFv$UX zDW_^B9Cw&m@AIgyR9Sgvvm0IlcQ}l0bx$Iq(w**1@}b0GhlYG!Q0k?FlCUyxuV^EC zzPZl~f}fKqEY3(_KS^=Nz-a!M_H8%mff_;(lpj)T-v#WU1|w}JobPnLgeDOZ zca$CazK@~9)|cXfU~mqc7m|x-uey%C{^pRZER%3*nKYwSvH;8Ji_7gM++K%G^T*xl z_NKcwBm7ly;qqCdlT&GpH8tqOVdFu1$eI1+dCy=}c~tAYQ>=&3r)Z?erz3*;2%=>^ zDYU|({W}m`(=@$=9JAZ3=PD#O;0!l{nNuEgGGGEM}gjp$`B$y(Abltg#X==s?zES|lZ zI`(=S*>XrWLryq9VtcP9uRb|5u)aG`(89(kKe0AZp%2U{9x0QQ=>tR_5`jRH? zoZn@V(D1?Utnfl7dnco410+G(cfw=L?#>->XW6D?N_NiB*x6<39?ar_N%LD2oHe_> z-fHVqV9vM~`R_Mx!safmn=jHTrp+#`y6dsHXzRGoB-MDMn8-S-bxj1O66%=EKCO+V zLEa8-IMjR>feeQ_6Wwi|0l-9#8sj*I{V+AG&9thaJ?_vEq?ZoHnlwxN^9RApYiTb{86{do5fHlg|7 zcdPH$>4Bi!d^&Hq=3qnKn~QtYAZv67Mj7!~l<3U+xko4WHdl%grY~d7aljB}S#r_n1=k!Xr1K33@|qAPQ8< z`ofnO3E|=?t*ac{LdpB*j8${LU43yzOKMJsk&kzYD<+IOi+pI6dUZis2}80b6~XVX zOBj$``6i7k&9LAEd8r=R{UmP_wlWF|iF-`jN8wuNMe40j&Pt@B)pNe|o`1uMi|4Vf zc1T+S8a?|eWJpgNacws@cWJ%7`X+q;{c1bHVWnxbcMU;Jj!@PBslkK` zrpRkPFHu8|5I!2t_QtB9eJdbj1NJWai;%OTC4R&J`!04R6E4s%vEHP`PiKAQbk^FR z!*cIKai;C&S$w#D6?>h;!U@v>w};c$Lioyzrhu!cAq>)t5UKOLM8GqCYQs4{@>5VrRemFGM# z!8%x~=Aa^-My6?qX549}ADeHwufyH<*Xz}$TrEMMb=kL)+c(I91E;AbuGS4BS+qms zj%B!!`CWBJxZ%`a?UeEc50MEu@#LgtLO zCcklR$#cr)m+8l}UN_%%*Kg7~?M8N)j84p@2?bJimeqiD;5;;sq^H$DAO{gVx+k2r zi#B+ZqKK}yg0!{5Var%eiO49lu^ovonZMCoeEuF)=I@oIwDx=x*~)?JY0%VjGTQIL z!th6~5Q5(^dhc(#`a6t1~Ys7td%b1Ne#(gM+sAcX6EwMfb|0<+Gj9#^BSmetR8A zk4?qG38za&-~Rt(fk+;;7v>dJwuVC*1|hNvA_R_kT6jR zkC0u?<^?24xpcc9qB^^uqRsQ9BXDXU%)~flJ@Nm1`H2?2z9q#%vG7S>*Yu}L0`x2_!VjhyA_s*5-mv1!p z2b~8$E~F>s>mgcj3>;yk?gar!;S6cfgYH6ywntZ5m)J(lUo>IyTvpd{+47Ahoi7&i zx3)5P4lMr|+SPU)%*Y(}Lx=j+h1Xsfay3q}X0IyP%XjqTJ?7D+eJ|E~R3~LW96(dLZ7d< zyH!@;mG8V!KRlXYx%e4uW#V2j58>d^btVa3Ehta)7%`(3BhS+m+}x&J^WF6- zt^Ig_v;+U@!eyH~n-9Gt)%jN*W;x29WHh4o3lgP=xS@AOBi{DC)JACA`#kt@BFMZY z@vT;gAauTL$2C7V)#5p|>3a%`;`QH6IKrvBk{P{a|F+`ib#v?Gn9PUBG9J01(^zp3{cf^K zZV=Q>sACTy@QmEYQi9Mc4Mqv047ZmCyl5jyL9uf#>D1{m813A#UJS;?)7aXmy%HD? zZ~V_#T|NhnpM^|f^@!6mGe}LsyPO^bHA@i`>X-zClfLWj7#k#q?dH3MCHBgNBt_!d zm`h`(tTKtb)(?q^t8`F8qn=v|OLFlcdsG>+SE8-+2QikyVIs zjY4W9nF^z=JV@PR3+0p%!F~&(_BB-D6Wy!Sk)N}k+eT~i)HNywqILXCxWMJnIYni+ z^%ofLEIw9`YGd`v9psembv`Wq)^)Mq;xmoK?Pk>lH?jmV=o?SEaTaU9sE;Cae?piZ z9_6n3NnCqu0e!HYuwJ%;d@>>=)eGDAsFHFqy5eEw{H{F=#>JCVS4&ch{sZS5=TwK! z?jti!!=syb;kRU878f#4q8VB zI?n;-N9%OY=t?^^;-p31HOvkhiB&D6889Z@vS{KK58w#&s6EC=w(sNcTYm zk5WY0nV?#mf+{U08=8NhLreGSAz$g$Q`$?O^O_Q>OQku{MawpRMjNfq+BRH$89IJk zM-sdoG-y?09CD#Vgt|!RK{4PjY4Vg(4Jd{zUVk5$I)U+BXL6EW5M-SfE-+51?|ZF% zm)gE0`#VTtvAVN(Pw)QQUOE&pFJ&8OdfoG5{n(C zpI`L4cn< zs(9#0ZhJxGxZ@I=8)~OFyTrmaIdh zYSf8zcHWA~s0`V-=kD!WexA$YA8&1A(KZrtS~Q`patP7K%V#_xj~0(3BYZNJCGI9| zV8Yu)BtcFo!H;DtFsN2qn=C(xWpvJMq)xDSM*{rCr8aecmB$I$%wtLp4mOA=S^_Vk!Zvx;MsLdvrkh>a z1BjBoPX z^Wt8bsw-%}?>6_Tw9}4v{&;SAv=;fvEhO?&XIs%xPL7Uk5Urw|N*fY(*tTFxdOY<4 zIs;gIq`R(+bXU4y7jnk=Pql~hY4{wy8hKWuxw;8yea!3{i3Kv!zPhm932l9yEbq`Z z5)RiO6FsIHwx0^$$7Dn$xyf{l#OzFft;$(?QS_lF?|a*WEMszhX9E7;#bXjH!}NX* z$>}?;8G|l=a|fU2NPoUs?Or#ZY&W~@L%CFX3k^DKkb65WmCyIQJ6Q6VJ(o2nWG`GaQD~23b?RHMN{-F8Q7?eXrAGtl%KUb~QrrQZu~b%q za__=_QnmEMkjTYov$qi-FoXslYtC^aMlQm*@*zElG<_TDG^S{f~MgHMP*ZwN1$vw)AtrdK=Oh#KZuSD zlj|70k`heoOVah1I4gn0C$;O!q;`d+B?+C+!UwAFN=|Gme@*nrBqqOG-QL|ae@l07 zNR4K8X(==^OpNsx_l5xL5-tx$cG{lc+-!$sXSOAHv@fA#x~MScZALDWk*llYUmPOz zKDGcMXe_ZX5wl|fpE5=~ojhJ}~F z{nZ!Y4gXNYm)q;j$a#s=zCo`VE|wJ; zU2q)YfS4tj!ebX7&#r6Z z*|F$Va6aFV>N%`xP9w5@K2ZLNL3s6TcfDd3iQ9a_d; zKKPQyWePrWQm5W@lAslR-#S5vL4*#y(5)lp+?sI2w%{e|<>EPPWk@@yl9T#56|%k# zmek18R6mM_9JrZ`n?t)>r8SP+MjPU4L<|!vE5@+z%E_s3M@o&yPsAbQPY#N)=vVHF8ep$=|J&`e(Pdt8~SG zNw3prrL+=XU07v=v*5D|&-sd>56I(5N#1=-!SiUkrRj+b&v#MVo|!9k*-NDNQnbE} z!X^o}^&pchi)0T1-+AG=^nz)yu3(NtkDSkpvU2EiY7X>_VY8p`-^*^b3C-WP>ybPl zWlV$CHD(V>MF2HbQx4-*dE)J*+NWEZCuZu-%Fdw6Is(kbur>;>RT5~l3#pI%noDNt zFVbK7r7tfHRch}_8PMlECIL9po`mMylYF$!f`Ts|!tJdZF>d})YoBW-a4VN%^fc<4 zJ1=vb{n!)Se#{Zb_#}DOV|2ZYkv~~xw3HdGmnfWeq*L#r2_?6mm$Sq>p|-lVe$P z6bRuHzn@-Xw!m`2M6Ru5xz7yzTng3M1g7)Y>vAZxpWTT|$)$UW7^?FWaivh`GdknJ zS=cPhOJ{d!yAHY0ydJ5T9__0OM<$C;;o_dEL_cmQm_&mP{g)lq4+?iwPvo&dnUIX_ uh3<3gY-+6tu64pCBQ^FEA(#RxnQYMfs9gFI)9C#Y^Zx@lnC@Qo literal 0 HcmV?d00001 diff --git a/tests/test_batch_connector.py b/tests/test_batch_connector.py new file mode 100644 index 0000000..8a4af3f --- /dev/null +++ b/tests/test_batch_connector.py @@ -0,0 +1,126 @@ +import os +import unittest +import yaml +import pytest +from typing import Any, Dict +from testcontainers.postgres import PostgresContainer +from testcontainers.kafka import KafkaContainer +from kafka import KafkaConsumer, TopicPartition +from pyspark.sql import SparkSession, DataFrame +from pyspark.conf import SparkConf +import logging + +# from obsrv.common import ObsrvException +from obsrv.job.batch import get_base_conf +from obsrv.connector.batch import ISourceConnector, SourceConnector +from obsrv.connector import ConnectorContext +from obsrv.connector import MetricsCollector +# from obsrv.models import ErrorData, StatusCode + +from tests.create_tables import create_tables + + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +@pytest.fixture(scope='session', autouse=True) +def setUp(request): + postgres = PostgresContainer("postgres:latest") + kafka = KafkaContainer("confluentinc/cp-kafka:latest") + + postgres.start() + kafka.start() + + with open(os.path.join(os.path.dirname(__file__), 'test_conf.yaml')) as config_file: + config = yaml.safe_load(config_file) + + config['connector-instance-id'] = 'test.new-york-taxi-data.1' + + config['postgres']['host'] = postgres.get_container_host_ip() + config['postgres']['port'] = postgres.get_exposed_port(5432) + config['postgres']['user'] = postgres.username + config['postgres']['password'] = postgres.password + config['postgres']['dbname'] = postgres.dbname + config['kafka']['bootstrap-servers'] = kafka.get_bootstrap_server() + + with open(os.path.join(os.path.dirname(__file__), 'config.yaml'), 'w') as config_file: + yaml.dump(config, config_file) + + create_tables(config) + + # clean up + def remove_container(): + import time + # logger.info("Waiting for 10 minutes before stopping the containers") + # time.sleep(600) + # input("Press Enter to continue...") + postgres.stop() + kafka.stop() + os.remove(os.path.join(os.path.dirname(__file__), 'config.yaml')) + + request.addfinalizer(remove_container) + + # yield (postgres, kafka) + +class TestSource(ISourceConnector): + def process(self, sc: SparkSession, ctx: ConnectorContext, connector_config: Dict[Any, Any], metrics_collector: MetricsCollector) -> DataFrame: + df = sc.read.format("json").load('tests/sample_data/nyt_data_100.json.gz') + logging.info("Dataframe: %s", df.count()) + yield df + + df1 = sc.read.format("json").load('tests/sample_data/nyt_data_100.json') + logging.info("Dataframe: %s", df1.count()) + yield df1 + + def get_spark_conf(self, connector_config) -> SparkConf: + conf = get_base_conf() + logging.info("Spark Config: %s", conf.getAll()) + return conf + +class TestBatchConnector(unittest.TestCase): + def test_source_connector(self): + connector = TestSource() + config_file_path = os.path.join(os.path.dirname(__file__), 'config.yaml') + + config = yaml.safe_load(open(config_file_path)) + + # print(config) + # from obsrv.utils.config import Config + # c = Config(config_file_path=config_file_path) + # print(c.find("kafka.bootstrap-servers")) + + self.assertEqual(os.path.exists(config_file_path), True) + + test_raw_topic = 'test.ingest' + test_metrics_topic = 'test.metrics' + + config = yaml.safe_load(open(config_file_path)) + + kafka_consumer = KafkaConsumer(bootstrap_servers=config['kafka']['bootstrap-servers'], group_id='test-group', enable_auto_commit=True) + + trt_consumer = TopicPartition(test_raw_topic, 0) + tmt_consumer = TopicPartition(test_metrics_topic, 0) + + kafka_consumer.assign([trt_consumer, tmt_consumer]) + # tmt_consumer = kafka_consumer.assign([tmt_consumer]) + + kafka_consumer.seek_to_beginning() + + + SourceConnector.process(connector=connector, config_file_path=config_file_path) + + metrics = [] + all_messages = kafka_consumer.poll(timeout_ms=10000) + # kafka_consumer.commit('test-group') + + for topic_partition, messages in all_messages.items(): + for message in messages: + logging.info("Infosys: %s", message.value) + if topic_partition.topic == test_metrics_topic: + metrics.append(message.value) + # commit offset + + logging.info("metrics post process: %s", metrics) + + assert kafka_consumer.end_offsets([trt_consumer]) == {trt_consumer: 200} + assert kafka_consumer.end_offsets([tmt_consumer]) == {tmt_consumer: 1} diff --git a/tests/test_conf.yaml b/tests/test_conf.yaml new file mode 100644 index 0000000..829323f --- /dev/null +++ b/tests/test_conf.yaml @@ -0,0 +1,21 @@ +postgres: + dbname: postgres + user: postgres + password: postgres + host: localhost + port: 5432 + +kafka: + bootstrap-servers: localhost:9092 + telemetry-topic: test.telemetry + connector-metrics-topic: test.metrics + producer: + compression: gzip + max-request-size: 1000000 # 1MB {1M: 1000000, 10M: 10000000, 5M: 5000000} + +obsrv_encryption_key: random_32_byte_encryption_string + +connector_instance_id: test.new-york-taxi-data.1 + +building-block: py-sdk-test +env: local \ No newline at end of file diff --git a/tests/test_connector_registry.py b/tests/test_connector_registry.py new file mode 100644 index 0000000..bda1148 --- /dev/null +++ b/tests/test_connector_registry.py @@ -0,0 +1,26 @@ +# import sys +# import logging +# import unittest +# from obsrv.connector.registry import ConnectorRegistry, ConnectorInstance + +# logger = logging.getLogger() +# logger.level = logging.INFO +# logger.addHandler(logging.StreamHandler(sys.stdout)) + +# class TestConnectorRegistry(unittest.TestCase): +# def setUp(self) -> None: +# self.connector_id = '1' +# self.connector_instance_id = 's3.new-york-taxi-data.1' +# self.connector_registry = ConnectorRegistry() + +# def test_get_connector_instances(self): +# connector_instances = self.connector_registry.get_connector_instances(self.connector_id) +# self.assertIsInstance(connector_instances, list) +# self.assertEqual(len(connector_instances), 1) + +# def test_get_connector_instance(self): +# connector_instance = ConnectorRegistry.get_connector_instance(self.connector_instance_id) +# self.assertIsInstance(connector_instance, ConnectorInstance) + +# # if __name__ == '__main__': # pragma: no cover +# # unittest.main() \ No newline at end of file diff --git a/tests/test_encryption_utils.py b/tests/test_encryption_utils.py new file mode 100644 index 0000000..ce54a92 --- /dev/null +++ b/tests/test_encryption_utils.py @@ -0,0 +1,36 @@ +# import sys +# import logging +# import unittest + +# from obsrv.utils import EncryptionUtil + +# logger = logging.getLogger() +# logger.level = logging.DEBUG +# logger.addHandler(logging.StreamHandler(sys.stdout)) + +# class TestEncryptionUtil(unittest.TestCase): +# def setUp(self): +# self.encryption_key = '5Gw743MySPvkcobvtVQoFJ0tUqAZ8TUw' +# self.encryption_util = EncryptionUtil(self.encryption_key) + +# def test_encrypt(self): +# plaintext = 'Hello, World!' +# encrypted_text = self.encryption_util.encrypt(plaintext) +# self.assertEqual(encrypted_text, 'tz3mCbuoi8dfMSuIPngERg==') +# self.assertNotEqual(plaintext, encrypted_text) + +# def test_decrypt(self): +# plaintext = 'Hello, World!' +# encrypted_text = self.encryption_util.encrypt(plaintext) +# decrypted_text = self.encryption_util.decrypt(encrypted_text) +# self.assertEqual(plaintext, decrypted_text) + +# def test_decrypt_wrong_key(self): +# plaintext = 'Hello, World!' +# encrypted_text = self.encryption_util.encrypt(plaintext) +# wrong_key_encryption_util = EncryptionUtil('ozfS4yogdS8opAsIO7bhPc5jkwoJ8wUy') +# with self.assertRaises(Exception): +# wrong_key_encryption_util.decrypt(encrypted_text) + +# if __name__ == '__main__': # pragma: no cover +# unittest.main() \ No newline at end of file diff --git a/tests/test_setup.py b/tests/test_setup.py new file mode 100644 index 0000000..f11fdb0 --- /dev/null +++ b/tests/test_setup.py @@ -0,0 +1,24 @@ +# import os +# import pytest +from testcontainers.postgres import PostgresContainer +from testcontainers.kafka import KafkaContainer +import yaml +# import psycopg2 + + +# def test_create_dataset(): +# with open(os.path.join(os.path.dirname(__file__), 'config.yaml'), 'r') as config_file: +# config = yaml.safe_load(config_file) +# conn = psycopg2.connect( +# host=config['postgres']['host'], +# port=config['postgres']['port'], +# user=config['postgres']['user'], +# password=config['postgres']['password'], +# dbname=config['postgres']['dbname'] +# ) + +# cur = conn.cursor() +# cur.execute("SELECT * FROM public.datasets;") +# result = cur.fetchone() + +# assert result[0] == 'new-york-taxi-data' \ No newline at end of file From 570c5c0f1deb27aa9d7c5e74dba735669eec07ad Mon Sep 17 00:00:00 2001 From: Ravi Mula Date: Thu, 25 Apr 2024 13:49:17 +0530 Subject: [PATCH 02/13] test case fixes (#2) * Sanketika-Obsrv/issue-tracker#128: python sdk init * Sanketika-Obsrv/issue-tracker#128: fix test cases --- obsrv/common/exception.py | 5 +- obsrv/connector/batch/obsrv_dataset.py | 2 - obsrv/connector/batch/source.py | 15 +---- obsrv/utils/db_util.py | 3 - obsrv/utils/encyption.py | 4 +- obsrv/utils/logger.py | 3 - tests/batch_setup.py | 45 +++++++++++++++ tests/config/config.template.yaml | 21 +++++++ tests/create_tables.py | 16 +++--- tests/test_batch_connector.py | 80 ++++---------------------- tests/test_connector_registry.py | 43 +++++++------- tests/test_encryption_utils.py | 54 +++++++---------- 12 files changed, 138 insertions(+), 153 deletions(-) create mode 100644 tests/batch_setup.py create mode 100644 tests/config/config.template.yaml diff --git a/obsrv/common/exception.py b/obsrv/common/exception.py index 9e44e79..228f09c 100644 --- a/obsrv/common/exception.py +++ b/obsrv/common/exception.py @@ -1,12 +1,13 @@ -from logging import getLogger from obsrv.models import ErrorData +from obsrv.utils import LoggerController -logger = getLogger(__name__) +logger = LoggerController(__name__) class ObsrvException(Exception): def __init__(self, error): self.error = error super().__init__(self.error.error_msg) + logger.exception(f"exception called from {self.__class__.__name__} with error {self.error.error_code} - {self.error.error_msg}") # class UnsupportedDataFormatException(ObsrvException): # def __init__(self, data_format): diff --git a/obsrv/connector/batch/obsrv_dataset.py b/obsrv/connector/batch/obsrv_dataset.py index 03553e3..caee7dc 100644 --- a/obsrv/connector/batch/obsrv_dataset.py +++ b/obsrv/connector/batch/obsrv_dataset.py @@ -64,8 +64,6 @@ def save_to_kafka(self, config, topic): kafka_servers = config.find("kafka.bootstrap-servers", "localhost:9092") compression_type = config.find("kafka.producer.compression", "snappy") - logger.info("saving valid events to kafka topic `%s` on `%s`", topic, kafka_servers) - self.valid_events.selectExpr("to_json(struct(*)) AS value").write.format("kafka") \ .option("kafka.bootstrap.servers", kafka_servers) \ .option("kafka.compression.type", compression_type) \ diff --git a/obsrv/connector/batch/source.py b/obsrv/connector/batch/source.py index c5fdca0..387f1f0 100644 --- a/obsrv/connector/batch/source.py +++ b/obsrv/connector/batch/source.py @@ -11,6 +11,7 @@ from obsrv.connector.batch.obsrv_dataset import ObsrvDataset from abc import ABC, abstractmethod +import json logger = LoggerController(__name__) @@ -76,25 +77,16 @@ def process_connector(connector: ISourceConnector, ctx: ConnectorContext, connec results = connector.execute(ctx=ctx, connector_config=connector_config, sc=sc, metrics_collector=metrics_collector) if isinstance(results, DataFrame): - logger.info("processing single dataframe with %s records", results.count()) res = SourceConnector.process_result(results, ctx, config) valid_records += res[0] failed_records += res[1] framework_exec_time += res[2] else: - # if isinstance(results, Iterator): - logger.info("processing iterator in process_connector") for result in results: - logger.info("processing dataframe from generator with %s records", result.count()) res = SourceConnector.process_result(result, ctx, config) valid_records += res[0] failed_records += res[1] framework_exec_time += res[2] - # else: - # res = SourceConnector.process_result(results, ctx, config) - # valid_records += res[0] - # failed_records += res[1] - # framework_exec_time += res[2] return ExecutionMetric( totalRecords=valid_records + failed_records, @@ -111,8 +103,6 @@ def process_result(result, ctx, config): dataset = ObsrvDataset(result) dataset.append_obsrv_meta(ctx) - logger.info("Total records post filter: %s", dataset.ds.count()) - dataset.filter_events(ctx, config) failed_events = dataset.invalid_events valid_events = dataset.valid_events @@ -143,7 +133,7 @@ def process(connector: ISourceConnector, config_file_path: AnyStr, **kwargs): connector_config = SourceConnector.get_connector_config(connector_instance) if 'is_encrypted' in connector_config and connector_config['is_encrypted']: encryption_util = EncryptionUtil(config.find("obsrv_encryption_key")) - connector_config = encryption_util.decrypt(connector_config) + connector_config = json.loads(encryption_util.decrypt(connector_config['connector_config'])) metrics_collector = MetricsCollector(ctx) sc = SourceConnector.get_spark_session(ctx, connector_config, connector.get_spark_conf(connector_config)) @@ -161,7 +151,6 @@ def process(connector: ISourceConnector, config_file_path: AnyStr, **kwargs): frameworkExecTime=execution_metric.frameworkExecTime + end_time - start_time, totalExecTime=end_time - start_time ) - logger.info("Metrics: %s", metric_event.to_json()) metrics_collector.collect(metric=metric_event.to_json()) except Exception as e: diff --git a/obsrv/utils/db_util.py b/obsrv/utils/db_util.py index 8b9ad91..7fb2e0c 100644 --- a/obsrv/utils/db_util.py +++ b/obsrv/utils/db_util.py @@ -1,14 +1,11 @@ import psycopg2 import psycopg2.extras -import logging class PostgresConnect: def __init__(self, config): self.config = config - self.logger = logging.getLogger(__name__) def connect(self): - # TODO: Read the values from config file db_host = self.config.get("host") db_port = self.config.get("port") db_user = self.config.get("user") diff --git a/obsrv/utils/encyption.py b/obsrv/utils/encyption.py index 2b39343..e43ba9f 100644 --- a/obsrv/utils/encyption.py +++ b/obsrv/utils/encyption.py @@ -9,12 +9,12 @@ def __init__(self, encryption_key): self.mode = AES.MODE_ECB self.block_size = AES.block_size - def encrypt(self, value): + def encrypt(self, value: str): cipher = self.algorithm.new(self.key, self.mode) padded_value = pad(value.encode('utf-8'), self.block_size) return b64encode(cipher.encrypt(padded_value)).decode('utf-8') - def decrypt(self, value): + def decrypt(self, value: str) -> str: cipher = self.algorithm.new(self.key, self.mode) decrypted_value64 = b64decode(value) decrypted_byte_value = unpad(cipher.decrypt(decrypted_value64), self.block_size) diff --git a/obsrv/utils/logger.py b/obsrv/utils/logger.py index b7548a8..37fa577 100644 --- a/obsrv/utils/logger.py +++ b/obsrv/utils/logger.py @@ -1,9 +1,6 @@ import logging from sys import stdout -# logging.basicConfig(stream=stdout, format='%(asctime)s %(levelname)s :%(message)s') -# logger = logging.getLogger(__name__) -# logger.setLevel(logging.INFO) class LoggerController(logging.Logger): def __init__(self, name): super().__init__(name) diff --git a/tests/batch_setup.py b/tests/batch_setup.py new file mode 100644 index 0000000..639b701 --- /dev/null +++ b/tests/batch_setup.py @@ -0,0 +1,45 @@ +import os +import pytest +from testcontainers.postgres import PostgresContainer +from testcontainers.kafka import KafkaContainer +import yaml + +from tests.create_tables import create_tables +# import psycopg2 + +@pytest.fixture(scope='session', autouse=True) +def setup_obsrv_database(request): + postgres = PostgresContainer("postgres:latest") + kafka = KafkaContainer("confluentinc/cp-kafka:latest") + + postgres.start() + kafka.start() + + with open(os.path.join(os.path.dirname(__file__), 'config/config.template.yaml')) as config_file: + config = yaml.safe_load(config_file) + + config['connector-instance-id'] = 'test.new-york-taxi-data.1' + + config['postgres']['host'] = postgres.get_container_host_ip() + config['postgres']['port'] = postgres.get_exposed_port(5432) + config['postgres']['user'] = postgres.username + config['postgres']['password'] = postgres.password + config['postgres']['dbname'] = postgres.dbname + config['kafka']['bootstrap-servers'] = kafka.get_bootstrap_server() + + with open(os.path.join(os.path.dirname(__file__), 'config/config.yaml'), 'w') as config_file: + yaml.dump(config, config_file) + + create_tables(config) + + # clean up + def remove_container(): + postgres.stop() + kafka.stop() + try: + os.remove(os.path.join(os.path.dirname(__file__), 'config/config.yaml')) + except FileNotFoundError: + print("config file already removed") + pass + + request.addfinalizer(remove_container) \ No newline at end of file diff --git a/tests/config/config.template.yaml b/tests/config/config.template.yaml new file mode 100644 index 0000000..829323f --- /dev/null +++ b/tests/config/config.template.yaml @@ -0,0 +1,21 @@ +postgres: + dbname: postgres + user: postgres + password: postgres + host: localhost + port: 5432 + +kafka: + bootstrap-servers: localhost:9092 + telemetry-topic: test.telemetry + connector-metrics-topic: test.metrics + producer: + compression: gzip + max-request-size: 1000000 # 1MB {1M: 1000000, 10M: 10000000, 5M: 5000000} + +obsrv_encryption_key: random_32_byte_encryption_string + +connector_instance_id: test.new-york-taxi-data.1 + +building-block: py-sdk-test +env: local \ No newline at end of file diff --git a/tests/create_tables.py b/tests/create_tables.py index 4c7e562..ac167ff 100644 --- a/tests/create_tables.py +++ b/tests/create_tables.py @@ -1,10 +1,11 @@ import yaml import os import psycopg2 +import json from obsrv.utils import EncryptionUtil def create_tables(config): - enc = EncryptionUtil(config['obsrv-encryption-key']) + enc = EncryptionUtil(config['obsrv_encryption_key']) datasets = """ CREATE TABLE IF NOT EXISTS datasets ( @@ -88,18 +89,17 @@ def create_tables(config): ('test.1', '1', 'source', 'object', 'test_reader', 'test_reader', 'Python', 'Apache 2.0', 'ravi@obsrv.ai', 'http://localhost', 'Live', 'SYSTEM', 'SYSTEM', now()); """ - connector_config = {"type":"local"} - enc_config = enc.encrypt(connector_config) + connector_config = json.dumps({"type":"local"}) + enc_config = {"is_encrypted": True, "connector_config": enc.encrypt(connector_config)} - enc ins_ci = """ INSERT INTO connector_instances (id, dataset_id, connector_id, connector_type, connector_config, operations_config, status, connector_state, connector_stats, created_by, updated_by, created_date, updated_date, published_date) VALUES - ('test.new-york-taxi-data.1', 'new-york-taxi-data', 'test.1', 'source', '{"type":"local"}', '{}', 'Live', '{}', '{}', 'SYSTEM', 'SYSTEM', now(), now(), now() + ('test.new-york-taxi-data.1', 'new-york-taxi-data', 'test.1', 'source', %s, '{}', 'Live', '{}', '{}', 'SYSTEM', 'SYSTEM', now(), now(), now() ); """ - with open(os.path.join(os.path.dirname(__file__), 'config.yaml'), 'r') as config_file: + with open(os.path.join(os.path.dirname(__file__), 'config/config.yaml'), 'r') as config_file: config = yaml.safe_load(config_file) conn = psycopg2.connect( host=config['postgres']['host'], @@ -110,12 +110,14 @@ def create_tables(config): ) cur = conn.cursor() + cur.execute(datasets) cur.execute(connector_registry) cur.execute(connector_instances) cur.execute(indexes) cur.execute(ins_ds) cur.execute(ins_cr) - cur.execute(ins_ci) + cur.execute(ins_ci, (json.dumps(enc_config),)) + conn.commit() conn.close() \ No newline at end of file diff --git a/tests/test_batch_connector.py b/tests/test_batch_connector.py index 8a4af3f..67a6b5b 100644 --- a/tests/test_batch_connector.py +++ b/tests/test_batch_connector.py @@ -8,7 +8,6 @@ from kafka import KafkaConsumer, TopicPartition from pyspark.sql import SparkSession, DataFrame from pyspark.conf import SparkConf -import logging # from obsrv.common import ObsrvException from obsrv.job.batch import get_base_conf @@ -18,109 +17,54 @@ # from obsrv.models import ErrorData, StatusCode from tests.create_tables import create_tables +from tests.batch_setup import setup_obsrv_database -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -@pytest.fixture(scope='session', autouse=True) -def setUp(request): - postgres = PostgresContainer("postgres:latest") - kafka = KafkaContainer("confluentinc/cp-kafka:latest") - - postgres.start() - kafka.start() - - with open(os.path.join(os.path.dirname(__file__), 'test_conf.yaml')) as config_file: - config = yaml.safe_load(config_file) - - config['connector-instance-id'] = 'test.new-york-taxi-data.1' - - config['postgres']['host'] = postgres.get_container_host_ip() - config['postgres']['port'] = postgres.get_exposed_port(5432) - config['postgres']['user'] = postgres.username - config['postgres']['password'] = postgres.password - config['postgres']['dbname'] = postgres.dbname - config['kafka']['bootstrap-servers'] = kafka.get_bootstrap_server() - - with open(os.path.join(os.path.dirname(__file__), 'config.yaml'), 'w') as config_file: - yaml.dump(config, config_file) - - create_tables(config) - - # clean up - def remove_container(): - import time - # logger.info("Waiting for 10 minutes before stopping the containers") - # time.sleep(600) - # input("Press Enter to continue...") - postgres.stop() - kafka.stop() - os.remove(os.path.join(os.path.dirname(__file__), 'config.yaml')) - - request.addfinalizer(remove_container) - - # yield (postgres, kafka) - class TestSource(ISourceConnector): def process(self, sc: SparkSession, ctx: ConnectorContext, connector_config: Dict[Any, Any], metrics_collector: MetricsCollector) -> DataFrame: df = sc.read.format("json").load('tests/sample_data/nyt_data_100.json.gz') - logging.info("Dataframe: %s", df.count()) yield df df1 = sc.read.format("json").load('tests/sample_data/nyt_data_100.json') - logging.info("Dataframe: %s", df1.count()) + yield df1 def get_spark_conf(self, connector_config) -> SparkConf: conf = get_base_conf() - logging.info("Spark Config: %s", conf.getAll()) return conf +@pytest.mark.usefixtures("setup_obsrv_database") class TestBatchConnector(unittest.TestCase): def test_source_connector(self): + connector = TestSource() - config_file_path = os.path.join(os.path.dirname(__file__), 'config.yaml') + config_file_path = os.path.join(os.path.dirname(__file__), 'config/config.yaml') config = yaml.safe_load(open(config_file_path)) - # print(config) - # from obsrv.utils.config import Config - # c = Config(config_file_path=config_file_path) - # print(c.find("kafka.bootstrap-servers")) - self.assertEqual(os.path.exists(config_file_path), True) test_raw_topic = 'test.ingest' test_metrics_topic = 'test.metrics' - config = yaml.safe_load(open(config_file_path)) - kafka_consumer = KafkaConsumer(bootstrap_servers=config['kafka']['bootstrap-servers'], group_id='test-group', enable_auto_commit=True) trt_consumer = TopicPartition(test_raw_topic, 0) tmt_consumer = TopicPartition(test_metrics_topic, 0) kafka_consumer.assign([trt_consumer, tmt_consumer]) - # tmt_consumer = kafka_consumer.assign([tmt_consumer]) - - kafka_consumer.seek_to_beginning() + # kafka_consumer.seek_to_beginning() SourceConnector.process(connector=connector, config_file_path=config_file_path) - metrics = [] - all_messages = kafka_consumer.poll(timeout_ms=10000) - # kafka_consumer.commit('test-group') - - for topic_partition, messages in all_messages.items(): - for message in messages: - logging.info("Infosys: %s", message.value) - if topic_partition.topic == test_metrics_topic: - metrics.append(message.value) - # commit offset + # metrics = [] + # all_messages = kafka_consumer.poll(timeout_ms=10000) - logging.info("metrics post process: %s", metrics) + # for topic_partition, messages in all_messages.items(): + # for message in messages: + # if topic_partition.topic == test_metrics_topic: + # metrics.append(message.value) assert kafka_consumer.end_offsets([trt_consumer]) == {trt_consumer: 200} assert kafka_consumer.end_offsets([tmt_consumer]) == {tmt_consumer: 1} diff --git a/tests/test_connector_registry.py b/tests/test_connector_registry.py index bda1148..75a5da5 100644 --- a/tests/test_connector_registry.py +++ b/tests/test_connector_registry.py @@ -1,26 +1,27 @@ -# import sys -# import logging -# import unittest -# from obsrv.connector.registry import ConnectorRegistry, ConnectorInstance +import unittest +import pytest +import os +import yaml +from obsrv.connector.registry import ConnectorRegistry, ConnectorInstance -# logger = logging.getLogger() -# logger.level = logging.INFO -# logger.addHandler(logging.StreamHandler(sys.stdout)) +from tests.batch_setup import setup_obsrv_database -# class TestConnectorRegistry(unittest.TestCase): -# def setUp(self) -> None: -# self.connector_id = '1' -# self.connector_instance_id = 's3.new-york-taxi-data.1' -# self.connector_registry = ConnectorRegistry() +@pytest.mark.usefixtures("setup_obsrv_database") +class TestConnectorRegistry(unittest.TestCase): + def setUp(self) -> None: + self.connector_id = 'test.1' + self.connector_instance_id = 'test.new-york-taxi-data.1' + self.connector_registry = ConnectorRegistry() -# def test_get_connector_instances(self): -# connector_instances = self.connector_registry.get_connector_instances(self.connector_id) -# self.assertIsInstance(connector_instances, list) -# self.assertEqual(len(connector_instances), 1) + with open(os.path.join(os.path.dirname(__file__), 'config/config.yaml')) as config_file: + config = yaml.safe_load(config_file) + self.postgres_config = config['postgres'] -# def test_get_connector_instance(self): -# connector_instance = ConnectorRegistry.get_connector_instance(self.connector_instance_id) -# self.assertIsInstance(connector_instance, ConnectorInstance) + def test_get_connector_instances(self): + connector_instances = self.connector_registry.get_connector_instances(self.connector_id, self.postgres_config) + self.assertIsInstance(connector_instances, list) + self.assertEqual(len(connector_instances), 1) -# # if __name__ == '__main__': # pragma: no cover -# # unittest.main() \ No newline at end of file + def test_get_connector_instance(self): + connector_instance = ConnectorRegistry.get_connector_instance(self.connector_instance_id, self.postgres_config) + self.assertIsInstance(connector_instance, ConnectorInstance) diff --git a/tests/test_encryption_utils.py b/tests/test_encryption_utils.py index ce54a92..d9bf0d8 100644 --- a/tests/test_encryption_utils.py +++ b/tests/test_encryption_utils.py @@ -1,36 +1,26 @@ -# import sys -# import logging -# import unittest +import unittest +from obsrv.utils import EncryptionUtil -# from obsrv.utils import EncryptionUtil +class TestEncryptionUtil(unittest.TestCase): + def setUp(self): + self.encryption_key = '5Gw743MySPvkcobvtVQoFJ0tUqAZ8TUw' + self.encryption_util = EncryptionUtil(self.encryption_key) -# logger = logging.getLogger() -# logger.level = logging.DEBUG -# logger.addHandler(logging.StreamHandler(sys.stdout)) + def test_encrypt(self): + plaintext = 'Hello, World!' + encrypted_text = self.encryption_util.encrypt(plaintext) + self.assertEqual(encrypted_text, 'tz3mCbuoi8dfMSuIPngERg==') + self.assertNotEqual(plaintext, encrypted_text) -# class TestEncryptionUtil(unittest.TestCase): -# def setUp(self): -# self.encryption_key = '5Gw743MySPvkcobvtVQoFJ0tUqAZ8TUw' -# self.encryption_util = EncryptionUtil(self.encryption_key) + def test_decrypt(self): + plaintext = 'Hello, World!' + encrypted_text = self.encryption_util.encrypt(plaintext) + decrypted_text = self.encryption_util.decrypt(encrypted_text) + self.assertEqual(plaintext, decrypted_text) -# def test_encrypt(self): -# plaintext = 'Hello, World!' -# encrypted_text = self.encryption_util.encrypt(plaintext) -# self.assertEqual(encrypted_text, 'tz3mCbuoi8dfMSuIPngERg==') -# self.assertNotEqual(plaintext, encrypted_text) - -# def test_decrypt(self): -# plaintext = 'Hello, World!' -# encrypted_text = self.encryption_util.encrypt(plaintext) -# decrypted_text = self.encryption_util.decrypt(encrypted_text) -# self.assertEqual(plaintext, decrypted_text) - -# def test_decrypt_wrong_key(self): -# plaintext = 'Hello, World!' -# encrypted_text = self.encryption_util.encrypt(plaintext) -# wrong_key_encryption_util = EncryptionUtil('ozfS4yogdS8opAsIO7bhPc5jkwoJ8wUy') -# with self.assertRaises(Exception): -# wrong_key_encryption_util.decrypt(encrypted_text) - -# if __name__ == '__main__': # pragma: no cover -# unittest.main() \ No newline at end of file + def test_decrypt_wrong_key(self): + plaintext = 'Hello, World!' + encrypted_text = self.encryption_util.encrypt(plaintext) + wrong_key_encryption_util = EncryptionUtil('ozfS4yogdS8opAsIO7bhPc5jkwoJ8wUy') + with self.assertRaises(Exception): + wrong_key_encryption_util.decrypt(encrypted_text) From da22cf5cd37d3dd0d4a5fdd0a8292427f50beb3d Mon Sep 17 00:00:00 2001 From: Ravi Mula Date: Thu, 25 Apr 2024 17:05:50 +0530 Subject: [PATCH 03/13] Sanketika-Obsrv/issue-tracker#128: release version --- poetry.lock | 165 ++++++++++++++++++++++++++++--------------------- pyproject.toml | 13 +++- 2 files changed, 106 insertions(+), 72 deletions(-) diff --git a/poetry.lock b/poetry.lock index b507ff8..5eb0268 100644 --- a/poetry.lock +++ b/poetry.lock @@ -123,63 +123,63 @@ files = [ [[package]] name = "coverage" -version = "7.4.3" +version = "7.5.0" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.8" files = [ - {file = "coverage-7.4.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8580b827d4746d47294c0e0b92854c85a92c2227927433998f0d3320ae8a71b6"}, - {file = "coverage-7.4.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:718187eeb9849fc6cc23e0d9b092bc2348821c5e1a901c9f8975df0bc785bfd4"}, - {file = "coverage-7.4.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:767b35c3a246bcb55b8044fd3a43b8cd553dd1f9f2c1eeb87a302b1f8daa0524"}, - {file = "coverage-7.4.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae7f19afe0cce50039e2c782bff379c7e347cba335429678450b8fe81c4ef96d"}, - {file = "coverage-7.4.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba3a8aaed13770e970b3df46980cb068d1c24af1a1968b7818b69af8c4347efb"}, - {file = "coverage-7.4.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:ee866acc0861caebb4f2ab79f0b94dbfbdbfadc19f82e6e9c93930f74e11d7a0"}, - {file = "coverage-7.4.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:506edb1dd49e13a2d4cac6a5173317b82a23c9d6e8df63efb4f0380de0fbccbc"}, - {file = "coverage-7.4.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd6545d97c98a192c5ac995d21c894b581f1fd14cf389be90724d21808b657e2"}, - {file = "coverage-7.4.3-cp310-cp310-win32.whl", hash = "sha256:f6a09b360d67e589236a44f0c39218a8efba2593b6abdccc300a8862cffc2f94"}, - {file = "coverage-7.4.3-cp310-cp310-win_amd64.whl", hash = "sha256:18d90523ce7553dd0b7e23cbb28865db23cddfd683a38fb224115f7826de78d0"}, - {file = "coverage-7.4.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cbbe5e739d45a52f3200a771c6d2c7acf89eb2524890a4a3aa1a7fa0695d2a47"}, - {file = "coverage-7.4.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:489763b2d037b164846ebac0cbd368b8a4ca56385c4090807ff9fad817de4113"}, - {file = "coverage-7.4.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:451f433ad901b3bb00184d83fd83d135fb682d780b38af7944c9faeecb1e0bfe"}, - {file = "coverage-7.4.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fcc66e222cf4c719fe7722a403888b1f5e1682d1679bd780e2b26c18bb648cdc"}, - {file = "coverage-7.4.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3ec74cfef2d985e145baae90d9b1b32f85e1741b04cd967aaf9cfa84c1334f3"}, - {file = "coverage-7.4.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:abbbd8093c5229c72d4c2926afaee0e6e3140de69d5dcd918b2921f2f0c8baba"}, - {file = "coverage-7.4.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:35eb581efdacf7b7422af677b92170da4ef34500467381e805944a3201df2079"}, - {file = "coverage-7.4.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8249b1c7334be8f8c3abcaaa996e1e4927b0e5a23b65f5bf6cfe3180d8ca7840"}, - {file = "coverage-7.4.3-cp311-cp311-win32.whl", hash = "sha256:cf30900aa1ba595312ae41978b95e256e419d8a823af79ce670835409fc02ad3"}, - {file = "coverage-7.4.3-cp311-cp311-win_amd64.whl", hash = "sha256:18c7320695c949de11a351742ee001849912fd57e62a706d83dfc1581897fa2e"}, - {file = "coverage-7.4.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b51bfc348925e92a9bd9b2e48dad13431b57011fd1038f08316e6bf1df107d10"}, - {file = "coverage-7.4.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d6cdecaedea1ea9e033d8adf6a0ab11107b49571bbb9737175444cea6eb72328"}, - {file = "coverage-7.4.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b2eccb883368f9e972e216c7b4c7c06cabda925b5f06dde0650281cb7666a30"}, - {file = "coverage-7.4.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6c00cdc8fa4e50e1cc1f941a7f2e3e0f26cb2a1233c9696f26963ff58445bac7"}, - {file = "coverage-7.4.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9a4a8dd3dcf4cbd3165737358e4d7dfbd9d59902ad11e3b15eebb6393b0446e"}, - {file = "coverage-7.4.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:062b0a75d9261e2f9c6d071753f7eef0fc9caf3a2c82d36d76667ba7b6470003"}, - {file = "coverage-7.4.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:ebe7c9e67a2d15fa97b77ea6571ce5e1e1f6b0db71d1d5e96f8d2bf134303c1d"}, - {file = "coverage-7.4.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c0a120238dd71c68484f02562f6d446d736adcc6ca0993712289b102705a9a3a"}, - {file = "coverage-7.4.3-cp312-cp312-win32.whl", hash = "sha256:37389611ba54fd6d278fde86eb2c013c8e50232e38f5c68235d09d0a3f8aa352"}, - {file = "coverage-7.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:d25b937a5d9ffa857d41be042b4238dd61db888533b53bc76dc082cb5a15e914"}, - {file = "coverage-7.4.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:28ca2098939eabab044ad68850aac8f8db6bf0b29bc7f2887d05889b17346454"}, - {file = "coverage-7.4.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:280459f0a03cecbe8800786cdc23067a8fc64c0bd51dc614008d9c36e1659d7e"}, - {file = "coverage-7.4.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c0cdedd3500e0511eac1517bf560149764b7d8e65cb800d8bf1c63ebf39edd2"}, - {file = "coverage-7.4.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9a9babb9466fe1da12417a4aed923e90124a534736de6201794a3aea9d98484e"}, - {file = "coverage-7.4.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dec9de46a33cf2dd87a5254af095a409ea3bf952d85ad339751e7de6d962cde6"}, - {file = "coverage-7.4.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:16bae383a9cc5abab9bb05c10a3e5a52e0a788325dc9ba8499e821885928968c"}, - {file = "coverage-7.4.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:2c854ce44e1ee31bda4e318af1dbcfc929026d12c5ed030095ad98197eeeaed0"}, - {file = "coverage-7.4.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ce8c50520f57ec57aa21a63ea4f325c7b657386b3f02ccaedeccf9ebe27686e1"}, - {file = "coverage-7.4.3-cp38-cp38-win32.whl", hash = "sha256:708a3369dcf055c00ddeeaa2b20f0dd1ce664eeabde6623e516c5228b753654f"}, - {file = "coverage-7.4.3-cp38-cp38-win_amd64.whl", hash = "sha256:1bf25fbca0c8d121a3e92a2a0555c7e5bc981aee5c3fdaf4bb7809f410f696b9"}, - {file = "coverage-7.4.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3b253094dbe1b431d3a4ac2f053b6d7ede2664ac559705a704f621742e034f1f"}, - {file = "coverage-7.4.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:77fbfc5720cceac9c200054b9fab50cb2a7d79660609200ab83f5db96162d20c"}, - {file = "coverage-7.4.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6679060424faa9c11808598504c3ab472de4531c571ab2befa32f4971835788e"}, - {file = "coverage-7.4.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4af154d617c875b52651dd8dd17a31270c495082f3d55f6128e7629658d63765"}, - {file = "coverage-7.4.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8640f1fde5e1b8e3439fe482cdc2b0bb6c329f4bb161927c28d2e8879c6029ee"}, - {file = "coverage-7.4.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:69b9f6f66c0af29642e73a520b6fed25ff9fd69a25975ebe6acb297234eda501"}, - {file = "coverage-7.4.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:0842571634f39016a6c03e9d4aba502be652a6e4455fadb73cd3a3a49173e38f"}, - {file = "coverage-7.4.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a78ed23b08e8ab524551f52953a8a05d61c3a760781762aac49f8de6eede8c45"}, - {file = "coverage-7.4.3-cp39-cp39-win32.whl", hash = "sha256:c0524de3ff096e15fcbfe8f056fdb4ea0bf497d584454f344d59fce069d3e6e9"}, - {file = "coverage-7.4.3-cp39-cp39-win_amd64.whl", hash = "sha256:0209a6369ccce576b43bb227dc8322d8ef9e323d089c6f3f26a597b09cb4d2aa"}, - {file = "coverage-7.4.3-pp38.pp39.pp310-none-any.whl", hash = "sha256:7cbde573904625509a3f37b6fecea974e363460b556a627c60dc2f47e2fffa51"}, - {file = "coverage-7.4.3.tar.gz", hash = "sha256:276f6077a5c61447a48d133ed13e759c09e62aff0dc84274a68dc18660104d52"}, + {file = "coverage-7.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:432949a32c3e3f820af808db1833d6d1631664d53dd3ce487aa25d574e18ad1c"}, + {file = "coverage-7.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2bd7065249703cbeb6d4ce679c734bef0ee69baa7bff9724361ada04a15b7e3b"}, + {file = "coverage-7.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbfe6389c5522b99768a93d89aca52ef92310a96b99782973b9d11e80511f932"}, + {file = "coverage-7.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:39793731182c4be939b4be0cdecde074b833f6171313cf53481f869937129ed3"}, + {file = "coverage-7.5.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85a5dbe1ba1bf38d6c63b6d2c42132d45cbee6d9f0c51b52c59aa4afba057517"}, + {file = "coverage-7.5.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:357754dcdfd811462a725e7501a9b4556388e8ecf66e79df6f4b988fa3d0b39a"}, + {file = "coverage-7.5.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a81eb64feded34f40c8986869a2f764f0fe2db58c0530d3a4afbcde50f314880"}, + {file = "coverage-7.5.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:51431d0abbed3a868e967f8257c5faf283d41ec882f58413cf295a389bb22e58"}, + {file = "coverage-7.5.0-cp310-cp310-win32.whl", hash = "sha256:f609ebcb0242d84b7adeee2b06c11a2ddaec5464d21888b2c8255f5fd6a98ae4"}, + {file = "coverage-7.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:6782cd6216fab5a83216cc39f13ebe30adfac2fa72688c5a4d8d180cd52e8f6a"}, + {file = "coverage-7.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e768d870801f68c74c2b669fc909839660180c366501d4cc4b87efd6b0eee375"}, + {file = "coverage-7.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:84921b10aeb2dd453247fd10de22907984eaf80901b578a5cf0bb1e279a587cb"}, + {file = "coverage-7.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:710c62b6e35a9a766b99b15cdc56d5aeda0914edae8bb467e9c355f75d14ee95"}, + {file = "coverage-7.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c379cdd3efc0658e652a14112d51a7668f6bfca7445c5a10dee7eabecabba19d"}, + {file = "coverage-7.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fea9d3ca80bcf17edb2c08a4704259dadac196fe5e9274067e7a20511fad1743"}, + {file = "coverage-7.5.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:41327143c5b1d715f5f98a397608f90ab9ebba606ae4e6f3389c2145410c52b1"}, + {file = "coverage-7.5.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:565b2e82d0968c977e0b0f7cbf25fd06d78d4856289abc79694c8edcce6eb2de"}, + {file = "coverage-7.5.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cf3539007202ebfe03923128fedfdd245db5860a36810136ad95a564a2fdffff"}, + {file = "coverage-7.5.0-cp311-cp311-win32.whl", hash = "sha256:bf0b4b8d9caa8d64df838e0f8dcf68fb570c5733b726d1494b87f3da85db3a2d"}, + {file = "coverage-7.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:9c6384cc90e37cfb60435bbbe0488444e54b98700f727f16f64d8bfda0b84656"}, + {file = "coverage-7.5.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fed7a72d54bd52f4aeb6c6e951f363903bd7d70bc1cad64dd1f087980d309ab9"}, + {file = "coverage-7.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cbe6581fcff7c8e262eb574244f81f5faaea539e712a058e6707a9d272fe5b64"}, + {file = "coverage-7.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad97ec0da94b378e593ef532b980c15e377df9b9608c7c6da3506953182398af"}, + {file = "coverage-7.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd4bacd62aa2f1a1627352fe68885d6ee694bdaebb16038b6e680f2924a9b2cc"}, + {file = "coverage-7.5.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:adf032b6c105881f9d77fa17d9eebe0ad1f9bfb2ad25777811f97c5362aa07f2"}, + {file = "coverage-7.5.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4ba01d9ba112b55bfa4b24808ec431197bb34f09f66f7cb4fd0258ff9d3711b1"}, + {file = "coverage-7.5.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f0bfe42523893c188e9616d853c47685e1c575fe25f737adf473d0405dcfa7eb"}, + {file = "coverage-7.5.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a9a7ef30a1b02547c1b23fa9a5564f03c9982fc71eb2ecb7f98c96d7a0db5cf2"}, + {file = "coverage-7.5.0-cp312-cp312-win32.whl", hash = "sha256:3c2b77f295edb9fcdb6a250f83e6481c679335ca7e6e4a955e4290350f2d22a4"}, + {file = "coverage-7.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:427e1e627b0963ac02d7c8730ca6d935df10280d230508c0ba059505e9233475"}, + {file = "coverage-7.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9dd88fce54abbdbf4c42fb1fea0e498973d07816f24c0e27a1ecaf91883ce69e"}, + {file = "coverage-7.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a898c11dca8f8c97b467138004a30133974aacd572818c383596f8d5b2eb04a9"}, + {file = "coverage-7.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07dfdd492d645eea1bd70fb1d6febdcf47db178b0d99161d8e4eed18e7f62fe7"}, + {file = "coverage-7.5.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d3d117890b6eee85887b1eed41eefe2e598ad6e40523d9f94c4c4b213258e4a4"}, + {file = "coverage-7.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6afd2e84e7da40fe23ca588379f815fb6dbbb1b757c883935ed11647205111cb"}, + {file = "coverage-7.5.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a9960dd1891b2ddf13a7fe45339cd59ecee3abb6b8326d8b932d0c5da208104f"}, + {file = "coverage-7.5.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ced268e82af993d7801a9db2dbc1d2322e786c5dc76295d8e89473d46c6b84d4"}, + {file = "coverage-7.5.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e7c211f25777746d468d76f11719e64acb40eed410d81c26cefac641975beb88"}, + {file = "coverage-7.5.0-cp38-cp38-win32.whl", hash = "sha256:262fffc1f6c1a26125d5d573e1ec379285a3723363f3bd9c83923c9593a2ac25"}, + {file = "coverage-7.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:eed462b4541c540d63ab57b3fc69e7d8c84d5957668854ee4e408b50e92ce26a"}, + {file = "coverage-7.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d0194d654e360b3e6cc9b774e83235bae6b9b2cac3be09040880bb0e8a88f4a1"}, + {file = "coverage-7.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:33c020d3322662e74bc507fb11488773a96894aa82a622c35a5a28673c0c26f5"}, + {file = "coverage-7.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbdf2cae14a06827bec50bd58e49249452d211d9caddd8bd80e35b53cb04631"}, + {file = "coverage-7.5.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3235d7c781232e525b0761730e052388a01548bd7f67d0067a253887c6e8df46"}, + {file = "coverage-7.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2de4e546f0ec4b2787d625e0b16b78e99c3e21bc1722b4977c0dddf11ca84e"}, + {file = "coverage-7.5.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4d0e206259b73af35c4ec1319fd04003776e11e859936658cb6ceffdeba0f5be"}, + {file = "coverage-7.5.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2055c4fb9a6ff624253d432aa471a37202cd8f458c033d6d989be4499aed037b"}, + {file = "coverage-7.5.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:075299460948cd12722a970c7eae43d25d37989da682997687b34ae6b87c0ef0"}, + {file = "coverage-7.5.0-cp39-cp39-win32.whl", hash = "sha256:280132aada3bc2f0fac939a5771db4fbb84f245cb35b94fae4994d4c1f80dae7"}, + {file = "coverage-7.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:c58536f6892559e030e6924896a44098bc1290663ea12532c78cef71d0df8493"}, + {file = "coverage-7.5.0-pp38.pp39.pp310-none-any.whl", hash = "sha256:2b57780b51084d5223eee7b59f0d4911c31c16ee5aa12737c7a02455829ff067"}, + {file = "coverage-7.5.0.tar.gz", hash = "sha256:cf62d17310f34084c59c01e027259076479128d11e4661bb6c9acb38c5e19bb8"}, ] [package.extras] @@ -206,6 +206,20 @@ urllib3 = ">=1.26.0" ssh = ["paramiko (>=2.4.3)"] websockets = ["websocket-client (>=1.3.0)"] +[[package]] +name = "exceptiongroup" +version = "1.2.1" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, + {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, +] + +[package.extras] +test = ["pytest (>=6)"] + [[package]] name = "idna" version = "3.7" @@ -229,26 +243,23 @@ files = [ ] [[package]] -name = "kafka-python" -version = "2.0.3.dev0" +name = "kafka-python-ng" +version = "2.2.2" description = "Pure Python client for Apache Kafka" optional = false -python-versions = "*" -files = [] -develop = false +python-versions = ">=3.8" +files = [ + {file = "kafka-python-ng-2.2.2.tar.gz", hash = "sha256:87ad3a766e2c0bec71d9b99bdd9e9c5cda62d96cfda61a8ca16510484d6ad7d4"}, + {file = "kafka_python_ng-2.2.2-py2.py3-none-any.whl", hash = "sha256:3fab1a03133fade1b6fd5367ff726d980e59031c4aaca9bf02c516840a4f8406"}, +] [package.extras] +boto = ["botocore"] crc32c = ["crc32c"] lz4 = ["lz4"] snappy = ["python-snappy"] zstd = ["zstandard"] -[package.source] -type = "git" -url = "https://github.com/dpkp/kafka-python.git" -reference = "HEAD" -resolved_reference = "a6d0579d3cadd3826dd364b01bc12a2173139abc" - [[package]] name = "packaging" version = "24.0" @@ -262,13 +273,13 @@ files = [ [[package]] name = "pluggy" -version = "1.4.0" +version = "1.5.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.8" files = [ - {file = "pluggy-1.4.0-py3-none-any.whl", hash = "sha256:7db9f7b503d67d1c5b95f59773ebb58a8c1c288129a88665838012cfb07b8981"}, - {file = "pluggy-1.4.0.tar.gz", hash = "sha256:8c85c2876142a764e5b7548e7d9a0e0ddb46f5185161049a79b7e974454223be"}, + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, ] [package.extras] @@ -441,9 +452,11 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} iniconfig = "*" packaging = "*" pluggy = ">=1.4,<2.0" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} [package.extras] testing = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] @@ -595,6 +608,17 @@ registry = ["bcrypt"] selenium = ["selenium"] weaviate = ["weaviate-client (>=4.5.4,<5.0.0)"] +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + [[package]] name = "typing-extensions" version = "4.11.0" @@ -702,7 +726,10 @@ files = [ {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"}, ] +[extras] +batch = ["pyspark"] + [metadata] lock-version = "2.0" -python-versions = "^3.12" -content-hash = "02f4246bf3bbb2546b674ba8698c3c49aff901734180e1ee0b4e038e089276d4" +python-versions = "^3.10" +content-hash = "73c7e1f23d313ff232e91e8547f16ffe7626fca8c17fc5367584546f9dc83fe1" diff --git a/pyproject.toml b/pyproject.toml index 54b06f2..145da2f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,18 +6,25 @@ authors = ["Ravi Mula "] readme = "README.md" [tool.poetry.dependencies] -python = "^3.12" +python = "^3.10" pycryptodome = "^3.20.0" psycopg2-binary = "^2.9.9" pyyaml = "^6.0.1" -kafka-python = {git = "https://github.com/dpkp/kafka-python.git"} +coverage = "^7.5.0" +kafka-python-ng = "^2.2.2" + +# A list of all of the optional dependencies, some of which are included in the +# below `extras`. They can be opted into by apps. +pyspark = { version = "^3.5.1", optional = true } + +[tool.poetry.extras] +batch = ["pyspark"] [tool.poetry.group.dev.dependencies] pytest = "^8.1.1" coverage = "^7.4.3" pyspark = "^3.5.1" testcontainers = {extras = ["kafka", "postgres"], version = "^4.4.0"} -kafka-python = "^2.0.2" [tool.poetry.group.batch.dependencies] pyspark = "^3.5.1" From 0bc6f9e544227bf1ba2b2889719dddf26bc50505 Mon Sep 17 00:00:00 2001 From: Ravi Mula Date: Sun, 12 May 2024 10:48:18 +0530 Subject: [PATCH 04/13] Sanketika-Obsrv/issue-tracker#128: linting and code cleanup --- obsrv/common/__init__.py | 2 +- obsrv/common/exception.py | 8 +- obsrv/connector/__init__.py | 2 +- obsrv/connector/batch/__init__.py | 2 +- obsrv/connector/batch/obsrv_dataset.py | 37 +++-- obsrv/connector/batch/source.py | 180 +++++++++++++++++-------- obsrv/connector/metrics_collector.py | 17 ++- obsrv/connector/registry.py | 79 +++++++---- obsrv/job/batch/__init__.py | 2 +- obsrv/job/batch/utils.py | 55 ++++++-- obsrv/models/__init__.py | 2 +- obsrv/models/data_models.py | 6 +- obsrv/models/metric.py | 28 ++-- obsrv/utils/__init__.py | 2 +- obsrv/utils/config.py | 3 +- obsrv/utils/db_util.py | 9 +- obsrv/utils/encyption.py | 10 +- obsrv/utils/json_util.py | 5 +- obsrv/utils/logger.py | 3 +- obsrv/utils/time.py | 4 +- tests/batch_setup.py | 30 +++-- tests/config/config.template.yaml | 2 +- tests/create_tables.py | 27 ++-- tests/test_batch_connector.py | 26 +++- tests/test_conf.yaml | 2 +- tests/test_connector_registry.py | 19 ++- tests/test_encryption_utils.py | 13 +- tests/test_setup.py | 24 ---- 28 files changed, 381 insertions(+), 218 deletions(-) delete mode 100644 tests/test_setup.py diff --git a/obsrv/common/__init__.py b/obsrv/common/__init__.py index 9001a4a..1b8f04f 100644 --- a/obsrv/common/__init__.py +++ b/obsrv/common/__init__.py @@ -1 +1 @@ -from .exception import ObsrvException \ No newline at end of file +from .exception import ObsrvException diff --git a/obsrv/common/exception.py b/obsrv/common/exception.py index 228f09c..d504059 100644 --- a/obsrv/common/exception.py +++ b/obsrv/common/exception.py @@ -3,12 +3,16 @@ logger = LoggerController(__name__) + class ObsrvException(Exception): def __init__(self, error): self.error = error super().__init__(self.error.error_msg) - logger.exception(f"exception called from {self.__class__.__name__} with error {self.error.error_code} - {self.error.error_msg}") + logger.exception( + f"exception called from {self.__class__.__name__} with error {self.error.error_code} - {self.error.error_msg}" + ) + # class UnsupportedDataFormatException(ObsrvException): # def __init__(self, data_format): -# super().__init__(ErrorData("DATA_FORMAT_ERR", f"Unsupported data format {data_format}")) \ No newline at end of file +# super().__init__(ErrorData("DATA_FORMAT_ERR", f"Unsupported data format {data_format}")) diff --git a/obsrv/connector/__init__.py b/obsrv/connector/__init__.py index 3f0e86e..af0f587 100644 --- a/obsrv/connector/__init__.py +++ b/obsrv/connector/__init__.py @@ -1,2 +1,2 @@ from .metrics_collector import MetricsCollector -from .registry import ConnectorContext, ConnectorInstance \ No newline at end of file +from .registry import ConnectorContext, ConnectorInstance diff --git a/obsrv/connector/batch/__init__.py b/obsrv/connector/batch/__init__.py index cfabb80..ad9de37 100644 --- a/obsrv/connector/batch/__init__.py +++ b/obsrv/connector/batch/__init__.py @@ -1,2 +1,2 @@ from .obsrv_dataset import ObsrvDataset -from .source import ISourceConnector, SourceConnector \ No newline at end of file +from .source import ISourceConnector, SourceConnector diff --git a/obsrv/connector/batch/obsrv_dataset.py b/obsrv/connector/batch/obsrv_dataset.py index caee7dc..38b878d 100644 --- a/obsrv/connector/batch/obsrv_dataset.py +++ b/obsrv/connector/batch/obsrv_dataset.py @@ -7,6 +7,7 @@ logger = LoggerController(__name__) + class ObsrvDataset: def __init__(self, ds: DataFrame): self.ds = ds @@ -16,20 +17,26 @@ def __init__(self, ds: DataFrame): def filter_events(self, ctx, config): max_event_size = config.find("kafka.producer.max-request-size", 1000000) self.ds = self.ds.withColumn("_obsrv_tmp_size", length(to_json(struct("*")))) - self.invalid_events = self.ds.filter(self.ds._obsrv_tmp_size > max_event_size).drop("_obsrv_tmp_size") - self.valid_events = self.ds.filter(self.ds._obsrv_tmp_size <= max_event_size).drop("_obsrv_tmp_size") + self.invalid_events = self.ds.filter( + self.ds._obsrv_tmp_size > max_event_size + ).drop("_obsrv_tmp_size") + self.valid_events = self.ds.filter( + self.ds._obsrv_tmp_size <= max_event_size + ).drop("_obsrv_tmp_size") def append_obsrv_meta(self, ctx): addn_meta = False source_meta = [ StructField("connector", StringType(), True), - StructField("connectorInstance", StringType(), True) + StructField("connectorInstance", StringType(), True), ] if "_addn_source_meta" in self.ds.columns: addn_meta = True source_meta.append(StructField("_addn_source_meta", StringType(), True)) - addn_meta_data = self.ds.select("_addn_source_meta").collect()[0][0].replace('"', "'") + addn_meta_data = ( + self.ds.select("_addn_source_meta").collect()[0][0].replace('"', "'") + ) self.ds = self.ds.drop("_addn_source_meta") obsrv_meta_schema = StructType( @@ -38,7 +45,7 @@ def append_obsrv_meta(self, ctx): StructField("flags", StringType(), True), StructField("timespans", StringType(), True), StructField("error", StringType(), True), - StructField("source", StructType(source_meta), True) + StructField("source", StructType(source_meta), True), ] ) @@ -50,8 +57,8 @@ def append_obsrv_meta(self, ctx): "error": {}, "source": { "connector": ctx.connector_id, - "connectorInstance": ctx.connector_instance_id - } + "connectorInstance": ctx.connector_instance_id, + }, } if addn_meta: @@ -61,11 +68,15 @@ def append_obsrv_meta(self, ctx): self.ds = self.ds.withColumn("obsrv_meta", obsrv_meta_struct) def save_to_kafka(self, config, topic): - kafka_servers = config.find("kafka.bootstrap-servers", "localhost:9092") + kafka_servers = config.find("kafka.broker-servers", "localhost:9092") compression_type = config.find("kafka.producer.compression", "snappy") - self.valid_events.selectExpr("to_json(struct(*)) AS value").write.format("kafka") \ - .option("kafka.bootstrap.servers", kafka_servers) \ - .option("kafka.compression.type", compression_type) \ - .option("topic", topic)\ - .save() + self.valid_events.selectExpr("to_json(struct(*)) AS value").write.format( + "kafka" + ).option("kafka.bootstrap.servers", kafka_servers).option( + "kafka.compression.type", compression_type + ).option( + "topic", topic + ).save() + + # TODO: Handle invalid events - send to dead letter queue diff --git a/obsrv/connector/batch/source.py b/obsrv/connector/batch/source.py index 387f1f0..719c221 100644 --- a/obsrv/connector/batch/source.py +++ b/obsrv/connector/batch/source.py @@ -15,6 +15,7 @@ logger = LoggerController(__name__) + class ISourceConnector(ABC): @final @@ -23,15 +24,6 @@ def execute(self, ctx, connector_config, sc, metrics_collector) -> Any: return results - # if isinstance(results, DataFrame): - # return results - # # elif isinstance(results, Iterator): - # else: - # for result in results: - # yield result - # else: - # return results - @abstractmethod def get_spark_conf(self, connector_config) -> SparkConf: pass @@ -40,11 +32,16 @@ def get_spark_conf(self, connector_config) -> SparkConf: def process(self, sc, ctx, connector_config) -> Any: pass + class SourceConnector: @final - def get_connector_instance(connector_instance_id: Any, postgres_config: Any) -> ConnectorInstance: - return ConnectorRegistry.get_connector_instance(connector_instance_id, postgres_config) + def get_connector_instance( + connector_instance_id: Any, postgres_config: Any + ) -> ConnectorInstance: + return ConnectorRegistry.get_connector_instance( + connector_instance_id, postgres_config + ) @final def get_connector_config(connector_instance: ConnectorInstance) -> Dict[Any, Any]: @@ -52,29 +49,47 @@ def get_connector_config(connector_instance: ConnectorInstance) -> Dict[Any, Any @final def get_additional_config(spark_conf: SparkConf) -> SparkConf: - addn_jars = [ - 'org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.1' - ] + addn_jars = ["org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.1"] configured_jars = spark_conf.get("spark.jars.packages", "") if len(configured_jars): - spark_conf.set("spark.jars.packages", f"{configured_jars},{','.join(addn_jars)}") + spark_conf.set( + "spark.jars.packages", f"{configured_jars},{','.join(addn_jars)}" + ) else: - spark_conf.set("spark.jars.packages", ','.join(addn_jars)) + spark_conf.set("spark.jars.packages", ",".join(addn_jars)) return spark_conf @final - def get_spark_session(ctx: ConnectorContext, connector_config: Dict[Any, Any], spark_conf: SparkConf) -> SparkSession: + def get_spark_session( + ctx: ConnectorContext, connector_config: Dict[Any, Any], spark_conf: SparkConf + ) -> SparkSession: spark_conf = SourceConnector.get_additional_config(spark_conf) try: - sc = SparkSession.builder.appName(ctx.connector_id).config(conf=spark_conf).getOrCreate() + sc = ( + SparkSession.builder.appName(ctx.connector_id) + .config(conf=spark_conf) + .getOrCreate() + ) return sc except Exception as e: logger.exception(f"Error creating spark session: {str(e)}") @final - def process_connector(connector: ISourceConnector, ctx: ConnectorContext, connector_config: Dict[Any, Any], config: Dict[Any, Any], sc: SparkSession, metrics_collector: MetricsCollector) -> ExecutionMetric: + def process_connector( + connector: ISourceConnector, + ctx: ConnectorContext, + connector_config: Dict[Any, Any], + config: Dict[Any, Any], + sc: SparkSession, + metrics_collector: MetricsCollector, + ) -> ExecutionMetric: valid_records, failed_records, framework_exec_time = 0, 0, 0 - results = connector.execute(ctx=ctx, connector_config=connector_config, sc=sc, metrics_collector=metrics_collector) + results = connector.execute( + ctx=ctx, + connector_config=connector_config, + sc=sc, + metrics_collector=metrics_collector, + ) if isinstance(results, DataFrame): res = SourceConnector.process_result(results, ctx, config) @@ -90,16 +105,15 @@ def process_connector(connector: ISourceConnector, ctx: ConnectorContext, connec return ExecutionMetric( totalRecords=valid_records + failed_records, - failedRecords=failed_records, successRecords=valid_records, - connectorExecTime=0, frameworkExecTime=framework_exec_time, totalExecTime=0 + failedRecords=failed_records, + successRecords=valid_records, + connectorExecTime=0, + frameworkExecTime=framework_exec_time, + totalExecTime=0, ) def process_result(result, ctx, config): start_time = time.time() - if "dataset" in result.columns: - result = result.drop("dataset") - if "obsrv_meta" in result.columns: - result = result.drop("obsrv_meta") dataset = ObsrvDataset(result) dataset.append_obsrv_meta(ctx) @@ -114,14 +128,15 @@ def process_result(result, ctx, config): return (valid_records_count, failed_records_count, end_time - start_time) - @final def process(connector: ISourceConnector, config_file_path: AnyStr, **kwargs): start_time = time.time() config = Config(config_file_path) connector_instance_id = config.find("connector_instance_id") - connector_instance = SourceConnector.get_connector_instance(connector_instance_id, config.find("postgres")) + connector_instance = SourceConnector.get_connector_instance( + connector_instance_id, config.find("postgres") + ) if connector_instance is None: raise Exception("Connector instance not found") @@ -131,16 +146,25 @@ def process(connector: ISourceConnector, config_file_path: AnyStr, **kwargs): ctx.building_block = config.find("building-block", None) ctx.env = config.find("env", None) connector_config = SourceConnector.get_connector_config(connector_instance) - if 'is_encrypted' in connector_config and connector_config['is_encrypted']: - encryption_util = EncryptionUtil(config.find("obsrv_encryption_key")) - connector_config = json.loads(encryption_util.decrypt(connector_config['connector_config'])) + # if 'is_encrypted' in connector_config and connector_config['is_encrypted']: + encryption_util = EncryptionUtil(config.find("obsrv_encryption_key")) + connector_config = json.loads(encryption_util.decrypt(connector_config)) metrics_collector = MetricsCollector(ctx) - sc = SourceConnector.get_spark_session(ctx, connector_config, connector.get_spark_conf(connector_config)) + sc = SourceConnector.get_spark_session( + ctx, connector_config, connector.get_spark_conf(connector_config) + ) connector_processing_start = time.time() try: - execution_metric = SourceConnector.process_connector(connector=connector, ctx=ctx, connector_config=connector_config, config=config, sc=sc, metrics_collector=metrics_collector) + execution_metric = SourceConnector.process_connector( + connector=connector, + ctx=ctx, + connector_config=connector_config, + config=config, + sc=sc, + metrics_collector=metrics_collector, + ) end_time = time.time() metric_event = ExecutionMetric( @@ -148,42 +172,82 @@ def process(connector: ISourceConnector, config_file_path: AnyStr, **kwargs): failedRecords=execution_metric.failedRecords, successRecords=execution_metric.successRecords, connectorExecTime=end_time - connector_processing_start, - frameworkExecTime=execution_metric.frameworkExecTime + end_time - start_time, - totalExecTime=end_time - start_time + frameworkExecTime=execution_metric.frameworkExecTime + + end_time + - start_time, + totalExecTime=end_time - start_time, ) metrics_collector.collect(metric=metric_event.to_json()) except Exception as e: logger.exception(f"error processing connector: {str(e)}") - ObsrvException(ErrorData("CONNECTOR_PROCESS_ERR", f"error processing connector: {str(e)}")) + ObsrvException( + ErrorData( + "CONNECTOR_PROCESS_ERR", f"error processing connector: {str(e)}" + ) + ) finally: - kafka_servers = config.find("kafka.bootstrap-servers", "localhost:9092") + kafka_servers = config.find("kafka.broker-servers", "localhost:9092") compression_type = config.find("kafka.producer.compression", "snappy") - sc.createDataFrame(metrics_collector.to_seq(), SourceConnector.get_metrics_schema()) \ - .selectExpr("to_json(struct(*)) AS value").write.format("kafka") \ - .option("kafka.bootstrap.servers", kafka_servers) \ - .option("kafka.compression.type", compression_type) \ - .option("topic", config.find("kafka.connector-metrics-topic")) \ - .save() + sc.createDataFrame( + metrics_collector.to_seq(), SourceConnector.get_metrics_schema() + ).selectExpr("to_json(struct(*)) AS value").write.format("kafka").option( + "kafka.bootstrap.servers", kafka_servers + ).option( + "kafka.compression.type", compression_type + ).option( + "topic", config.find("kafka.connector-metrics-topic") + ).save() sc.stop() def get_metrics_schema(): - from pyspark.sql.types import StructType, StructField, StringType, LongType, MapType, DoubleType, ArrayType - - schema = StructType([ - StructField("actor", MapType(StringType(), StringType()), nullable=False), - StructField("context", MapType(StringType(), MapType(StringType(), StringType())), nullable=False), - StructField("edata", StructType([ - StructField("labels", ArrayType(MapType(StringType(), StringType())), nullable=False), - StructField("metric", MapType(StringType(), DoubleType()), nullable=False) - ]), nullable=False), - StructField("eid", StringType(), nullable=False), - StructField("ets", LongType(), nullable=False), - StructField("mid", StringType(), nullable=False), - StructField("object", MapType(StringType(), StringType()), nullable=False) - ]) + from pyspark.sql.types import ( + StructType, + StructField, + StringType, + LongType, + MapType, + DoubleType, + ArrayType, + ) - return schema + schema = StructType( + [ + StructField( + "actor", MapType(StringType(), StringType()), nullable=False + ), + StructField( + "context", + MapType(StringType(), MapType(StringType(), StringType())), + nullable=False, + ), + StructField( + "edata", + StructType( + [ + StructField( + "labels", + ArrayType(MapType(StringType(), StringType())), + nullable=False, + ), + StructField( + "metric", + MapType(StringType(), DoubleType()), + nullable=False, + ), + ] + ), + nullable=False, + ), + StructField("eid", StringType(), nullable=False), + StructField("ets", LongType(), nullable=False), + StructField("mid", StringType(), nullable=False), + StructField( + "object", MapType(StringType(), StringType()), nullable=False + ), + ] + ) + return schema diff --git a/obsrv/connector/metrics_collector.py b/obsrv/connector/metrics_collector.py index 24b54f6..9f2e29a 100644 --- a/obsrv/connector/metrics_collector.py +++ b/obsrv/connector/metrics_collector.py @@ -3,15 +3,18 @@ from typing import List, Dict from obsrv.models import EventID, Metric, MetricContext, MetricData + class MetricsCollector: def __init__(self, ctx): self.metric_labels = [ {"key": "type", "value": "Connector"}, {"key": "job", "value": ctx.connector_id}, {"key": "instance", "value": ctx.connector_instance_id}, - {"key": "dataset", "value": ctx.dataset_id} + {"key": "dataset", "value": ctx.dataset_id}, ] - self.metric_context = MetricContext(pdata = {"id": "Connector", "pid": ctx.connector_id}) + self.metric_context = MetricContext( + pdata={"id": "Connector", "pid": ctx.connector_id} + ) self.metric_actor = {"id": ctx.connector_id, "type": "SYSTEM"} self.metric_object = {"id": ctx.dataset_id, "type": "Dataset"} @@ -25,12 +28,16 @@ def collect(self, metric, value=None, addn_labels=[]): def generate(self, metric_map: Dict, addn_labels: List): return Metric( - eid=EventID.METRIC.value, ets=int(time.time() * 1000), mid=str(uuid.uuid4()), + eid=EventID.METRIC.value, + ets=int(time.time() * 1000), + mid=str(uuid.uuid4()), actor=self.metric_actor, context=self.metric_context, object=self.metric_object, - edata=MetricData(metric=metric_map, labels=self.metric_labels+addn_labels) + edata=MetricData( + metric=metric_map, labels=self.metric_labels + addn_labels + ), ) def to_seq(self): - return [metric.to_json() for metric in self.metrics] \ No newline at end of file + return [metric.to_json() for metric in self.metrics] diff --git a/obsrv/connector/registry.py b/obsrv/connector/registry.py index 144fedc..2daa912 100644 --- a/obsrv/connector/registry.py +++ b/obsrv/connector/registry.py @@ -16,8 +16,9 @@ class ConnectorContext: entry_topic: Optional[str] = None building_block: Optional[str] = None env: Optional[str] = None - state: Optional['ConnectorState'] = None - stats: Optional['ConnectorStats'] = None + state: Optional["ConnectorState"] = None + stats: Optional["ConnectorStats"] = None + @dataclass class ConnectorInstance: @@ -26,6 +27,7 @@ class ConnectorInstance: operations_config: str status: str + class ConnectorState: def __init__(self, postgres_config, connector_instance_id, state_json=None): self.connector_instance_id = connector_instance_id @@ -49,9 +51,16 @@ def to_json(self): # @staticmethod def save_state(self): - count = ConnectorRegistry.update_connector_state(self.connector_instance_id, self.postgres_config, self.to_json()) + count = ConnectorRegistry.update_connector_state( + self.connector_instance_id, self.postgres_config, self.to_json() + ) if count != 1: - raise ObsrvException(ErrorData("CONN_STATE_SAVE_FAILED", "Unable to save the connector state")) + raise ObsrvException( + ErrorData( + "CONN_STATE_SAVE_FAILED", "Unable to save the connector state" + ) + ) + class ConnectorStats: def __init__(self, postgres_config, connector_instance_id, stats_json=None): @@ -72,9 +81,16 @@ def to_json(self): return json.dumps(self.stats, default=str) def save_stats(self): - upd_count = ConnectorRegistry.update_connector_stats(self.connector_instance_id, self.postgres_config, self.to_json()) + upd_count = ConnectorRegistry.update_connector_stats( + self.connector_instance_id, self.postgres_config, self.to_json() + ) if upd_count != 1: - raise ObsrvException(ErrorData("CONN_STATS_SAVE_FAILED", "Unable to save the connector stats")) + raise ObsrvException( + ErrorData( + "CONN_STATS_SAVE_FAILED", "Unable to save the connector stats" + ) + ) + class ConnectorRegistry: @staticmethod @@ -85,7 +101,9 @@ def get_connector_instances(connector_id, postgres_config): FROM connector_instances as ci JOIN datasets d ON ci.dataset_id = d.id WHERE ci.connector_id = '{}' AND d.status = 'Live' AND ci.status = 'Live' - """.format(connector_id) + """.format( + connector_id + ) result = postgres_connect.execute_select_all(query) return [parse_connector_instance(row, postgres_config) for row in result] @@ -97,7 +115,9 @@ def get_connector_instance(connector_instance_id, postgres_config): FROM connector_instances as ci JOIN datasets d ON ci.dataset_id = d.id WHERE ci.id = '{}' AND d.status = 'Live' AND ci.status = 'Live' - """.format(connector_instance_id) + """.format( + connector_instance_id + ) result = postgres_connect.execute_select_one(query) return parse_connector_instance(result, postgres_config) if result else None @@ -106,7 +126,9 @@ def update_connector_state(connector_instance_id, postgres_config, state): postgres_connect = PostgresConnect(postgres_config) query = """ UPDATE connector_instances SET connector_state = '{}' WHERE id = '{}' - """.format(state, connector_instance_id) + """.format( + state, connector_instance_id + ) return postgres_connect.execute_upsert(query) @staticmethod @@ -114,22 +136,25 @@ def update_connector_stats(connector_instance_id, postgres_config, stats): postgres_connect = PostgresConnect(postgres_config) query = """ UPDATE connector_instances SET connector_stats = '{}' WHERE id = '{}' - """.format(stats, connector_instance_id) + """.format( + stats, connector_instance_id + ) return postgres_connect.execute_upsert(query) + def parse_connector_instance(rs, postgres_config) -> ConnectorInstance: - id = rs['id'] - dataset_id = rs['dataset_id'] - connector_id = rs['connector_id'] - connector_type = rs['connector_type'] - connector_config = rs['connector_config'] - # data_format = connector_config['fileFormat']['type'] - operations_config = rs['operations_config'] - status = rs['status'] - dataset_config = rs['dataset_config'] - connector_state = rs.get('connector_state', {}) - connector_stats = rs.get('connector_stats', {}) - entry_topic = dataset_config.get('entry_topic', 'dev.ingest') + id = rs["id"] + dataset_id = rs["dataset_id"] + connector_id = rs["connector_id"] + connector_type = rs["connector_type"] + connector_config = rs["connector_config"] + data_format = rs["data_format"] + operations_config = rs["operations_config"] + status = rs["status"] + dataset_config = rs["dataset_config"] + connector_state = rs.get("connector_state", {}) + connector_stats = rs.get("connector_stats", {}) + entry_topic = dataset_config.get("entry_topic", "dev.ingest") return ConnectorInstance( connector_context=ConnectorContext( @@ -137,12 +162,12 @@ def parse_connector_instance(rs, postgres_config) -> ConnectorInstance: dataset_id=dataset_id, connector_instance_id=id, connector_type=connector_type, - # data_format=data_format, + data_format=data_format, entry_topic=entry_topic, - state = ConnectorState(postgres_config, id, connector_state), - stats = ConnectorStats(postgres_config, id, connector_stats) + state=ConnectorState(postgres_config, id, connector_state), + stats=ConnectorStats(postgres_config, id, connector_stats), ), connector_config=connector_config, operations_config=operations_config, - status=status - ) \ No newline at end of file + status=status, + ) diff --git a/obsrv/job/batch/__init__.py b/obsrv/job/batch/__init__.py index 185dc3a..d2719b5 100644 --- a/obsrv/job/batch/__init__.py +++ b/obsrv/job/batch/__init__.py @@ -1 +1 @@ -from .utils import get_base_conf \ No newline at end of file +from .utils import get_base_conf diff --git a/obsrv/job/batch/utils.py b/obsrv/job/batch/utils.py index 7efefa3..edc4b61 100644 --- a/obsrv/job/batch/utils.py +++ b/obsrv/job/batch/utils.py @@ -1,29 +1,56 @@ from pyspark.conf import SparkConf + def get_base_conf() -> SparkConf: conf = SparkConf() # conf.setMaster("local") # Set master as local for testing # conf.set("spark.jars.packages", "org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.1") # Include SQL Kafka to be able to write to kafka - conf.set("spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version", "2") # Set output committer algorithm version + conf.set( + "spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version", "2" + ) # Set output committer algorithm version conf.set("spark.speculation", "false") # Disable speculation - conf.set("spark.hadoop.mapreduce.map.speculative", "false") # Disable map speculative execution - conf.set("spark.hadoop.mapreduce.reduce.speculative", "false") # Disable reduce speculative execution - conf.set("spark.sql.parquet.filterPushdown", "true") # Enable Parquet filter pushdown - conf.set("spark.sql.sources.partitionOverwriteMode", "dynamic") # Set partition overwrite mode - conf.set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", "true") # Enable recursive directory listing - conf.set("spark.sql.execution.arrow.pyspark.enabled", "true") # Enable Apache Arrow optimization - conf.set("spark.executor.heartbeatInterval", "60s") # Set executor heartbeat interval + conf.set( + "spark.hadoop.mapreduce.map.speculative", "false" + ) # Disable map speculative execution + conf.set( + "spark.hadoop.mapreduce.reduce.speculative", "false" + ) # Disable reduce speculative execution + conf.set( + "spark.sql.parquet.filterPushdown", "true" + ) # Enable Parquet filter pushdown + conf.set( + "spark.sql.sources.partitionOverwriteMode", "dynamic" + ) # Set partition overwrite mode + conf.set( + "spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", "true" + ) # Enable recursive directory listing + conf.set( + "spark.sql.execution.arrow.pyspark.enabled", "true" + ) # Enable Apache Arrow optimization + conf.set( + "spark.executor.heartbeatInterval", "60s" + ) # Set executor heartbeat interval conf.set("spark.network.timeout", "600s") # Set network timeout conf.set("spark.sql.shuffle.partitions", "200") # Set shuffle partitions conf.set("spark.default.parallelism", "200") # Set default parallelism conf.set("spark.sql.session.timeZone", "UTC") # Set timezone - conf.set("spark.sql.catalogImplementation", "hive") # Use Hive catalog implementation - conf.set("spark.sql.sources.partitionColumnTypeInference.enabled", "false") # Disable partition column type inference - conf.set("spark.hadoop.mapreduce.fileoutputcommitter.cleanup-failures.ignored", "true") # Ignore cleanup failures - conf.set("spark.hadoop.parquet.enable.summary-metadata", "false") # Disable summary metadata for Parquet + conf.set( + "spark.sql.catalogImplementation", "hive" + ) # Use Hive catalog implementation + conf.set( + "spark.sql.sources.partitionColumnTypeInference.enabled", "false" + ) # Disable partition column type inference + conf.set( + "spark.hadoop.mapreduce.fileoutputcommitter.cleanup-failures.ignored", "true" + ) # Ignore cleanup failures + conf.set( + "spark.hadoop.parquet.enable.summary-metadata", "false" + ) # Disable summary metadata for Parquet conf.set("spark.sql.sources.ignoreCorruptFiles", "true") # Ignore corrupt files conf.set("spark.sql.adaptive.enabled", "true") # Enable adaptive query execution - conf.set("spark.sql.legacy.timeParserPolicy", "LEGACY") # Set time parser policy to LEGACY + conf.set( + "spark.sql.legacy.timeParserPolicy", "LEGACY" + ) # Set time parser policy to LEGACY - return conf \ No newline at end of file + return conf diff --git a/obsrv/models/__init__.py b/obsrv/models/__init__.py index 249f2f9..61292a1 100644 --- a/obsrv/models/__init__.py +++ b/obsrv/models/__init__.py @@ -1,2 +1,2 @@ from .data_models import ErrorData, EventID, StatusCode -from .metric import Metric, MetricContext, MetricData, ExecutionMetric \ No newline at end of file +from .metric import Metric, MetricContext, MetricData, ExecutionMetric diff --git a/obsrv/models/data_models.py b/obsrv/models/data_models.py index dd0dd41..727deff 100644 --- a/obsrv/models/data_models.py +++ b/obsrv/models/data_models.py @@ -1,11 +1,14 @@ from enum import Enum from dataclasses import dataclass + # from typing import Optional, Dict, Any + class EventID(Enum): LOG = "LOG" METRIC = "METRIC" + class StatusCode(Enum): SUCCESS = "success" FAILED = "failed" @@ -16,6 +19,7 @@ class ErrorData: error_code: str error_msg: str + # @dataclass # class ContextData: # connector_id: str @@ -43,4 +47,4 @@ class ErrorData: # etype: EventID # ctx: ContextData # data: EData -# ets: int \ No newline at end of file +# ets: int diff --git a/obsrv/models/metric.py b/obsrv/models/metric.py index d5523e6..a5a9f7d 100644 --- a/obsrv/models/metric.py +++ b/obsrv/models/metric.py @@ -16,17 +16,17 @@ def to_json(self): "actor": self.actor, "context": self.context.to_json(), "object": self.object, - "edata": self.edata.to_json() + "edata": self.edata.to_json(), } + class MetricContext: def __init__(self, pdata): self.pdata = pdata def to_json(self): - return { - "pdata": self.pdata - } + return {"pdata": self.pdata} + class MetricData: def __init__(self, metric, labels): @@ -34,13 +34,19 @@ def __init__(self, metric, labels): self.labels = labels def to_json(self): - return { - "metric": self.metric, - "labels": self.labels - } + return {"metric": self.metric, "labels": self.labels} + class ExecutionMetric: - def __init__(self, totalRecords, failedRecords, successRecords, connectorExecTime, frameworkExecTime, totalExecTime): + def __init__( + self, + totalRecords, + failedRecords, + successRecords, + connectorExecTime, + frameworkExecTime, + totalExecTime, + ): self.totalRecords = totalRecords self.failedRecords = failedRecords self.successRecords = successRecords @@ -58,5 +64,5 @@ def to_json(self): "success_records_count": self.successRecords, "total_exec_time_ms": self.totalExecTime, "connector_exec_time_ms": self.connectorExecTime, - "fw_exec_time_ms": self.frameworkExecTime - } \ No newline at end of file + "fw_exec_time_ms": self.frameworkExecTime, + } diff --git a/obsrv/utils/__init__.py b/obsrv/utils/__init__.py index 9e35f04..a82ac34 100644 --- a/obsrv/utils/__init__.py +++ b/obsrv/utils/__init__.py @@ -2,4 +2,4 @@ from .encyption import EncryptionUtil from .db_util import PostgresConnect from .time import time_it -from .logger import LoggerController \ No newline at end of file +from .logger import LoggerController diff --git a/obsrv/utils/config.py b/obsrv/utils/config.py index 7fc5e3e..db43316 100644 --- a/obsrv/utils/config.py +++ b/obsrv/utils/config.py @@ -6,6 +6,7 @@ logger = LoggerController(__name__) + class Config: def __init__(self, config_file_path): with open(config_file_path) as config_file: @@ -17,4 +18,4 @@ def find(self, path, default=None): return element_value except KeyError: logger.exception("key `%s` not found in config", path) - return default \ No newline at end of file + return default diff --git a/obsrv/utils/db_util.py b/obsrv/utils/db_util.py index 7fb2e0c..30c7784 100644 --- a/obsrv/utils/db_util.py +++ b/obsrv/utils/db_util.py @@ -1,6 +1,7 @@ import psycopg2 import psycopg2.extras + class PostgresConnect: def __init__(self, config): self.config = config @@ -13,8 +14,10 @@ def connect(self): database = self.config.get("dbname") db_connection = psycopg2.connect( database=database, - host=db_host, port=db_port, - user=db_user, password=db_password + host=db_host, + port=db_port, + user=db_user, + password=db_password, ) db_connection.autocommit = True return db_connection @@ -41,4 +44,4 @@ def execute_upsert(self, sql): cursor.execute(sql) record_count = cursor.rowcount db_connection.close() - return record_count \ No newline at end of file + return record_count diff --git a/obsrv/utils/encyption.py b/obsrv/utils/encyption.py index e43ba9f..98152fa 100644 --- a/obsrv/utils/encyption.py +++ b/obsrv/utils/encyption.py @@ -2,21 +2,21 @@ from Crypto.Cipher import AES from Crypto.Util.Padding import pad, unpad + class EncryptionUtil: def __init__(self, encryption_key): self.algorithm = AES - self.key = encryption_key.encode('utf-8') + self.key = encryption_key.encode("utf-8") self.mode = AES.MODE_ECB self.block_size = AES.block_size def encrypt(self, value: str): cipher = self.algorithm.new(self.key, self.mode) - padded_value = pad(value.encode('utf-8'), self.block_size) - return b64encode(cipher.encrypt(padded_value)).decode('utf-8') + padded_value = pad(value.encode("utf-8"), self.block_size) + return b64encode(cipher.encrypt(padded_value)).decode("utf-8") def decrypt(self, value: str) -> str: cipher = self.algorithm.new(self.key, self.mode) decrypted_value64 = b64decode(value) decrypted_byte_value = unpad(cipher.decrypt(decrypted_value64), self.block_size) - return decrypted_byte_value.decode('utf-8') - + return decrypted_byte_value.decode("utf-8") diff --git a/obsrv/utils/json_util.py b/obsrv/utils/json_util.py index 0c72f5e..ebadea3 100644 --- a/obsrv/utils/json_util.py +++ b/obsrv/utils/json_util.py @@ -1,6 +1,7 @@ import json -class JSONUtil: # pragma: no cover + +class JSONUtil: # pragma: no cover @staticmethod def serialize(obj): if isinstance(obj, str): @@ -23,4 +24,4 @@ def get_json_type(json_str): else: return "NOT_A_JSON" except json.JSONDecodeError: - return "NOT_A_JSON" \ No newline at end of file + return "NOT_A_JSON" diff --git a/obsrv/utils/logger.py b/obsrv/utils/logger.py index 37fa577..58a820b 100644 --- a/obsrv/utils/logger.py +++ b/obsrv/utils/logger.py @@ -1,6 +1,7 @@ import logging from sys import stdout + class LoggerController(logging.Logger): def __init__(self, name): super().__init__(name) @@ -15,4 +16,4 @@ def __init__(self, name): self.setLevel(logging.INFO) def handle(self, record): - super().handle(record) \ No newline at end of file + super().handle(record) diff --git a/obsrv/utils/time.py b/obsrv/utils/time.py index 3c7b287..7a372ba 100644 --- a/obsrv/utils/time.py +++ b/obsrv/utils/time.py @@ -1,6 +1,7 @@ import time from typing import Iterator + def time_it(func): def wrapper(*args, **kwargs): start_time = time.time() @@ -12,4 +13,5 @@ def wrapper(*args, **kwargs): yield elapsed_time, result else: return elapsed_time, result - return wrapper \ No newline at end of file + + return wrapper diff --git a/tests/batch_setup.py b/tests/batch_setup.py index 639b701..c660145 100644 --- a/tests/batch_setup.py +++ b/tests/batch_setup.py @@ -5,9 +5,11 @@ import yaml from tests.create_tables import create_tables + # import psycopg2 -@pytest.fixture(scope='session', autouse=True) + +@pytest.fixture(scope="session", autouse=True) def setup_obsrv_database(request): postgres = PostgresContainer("postgres:latest") kafka = KafkaContainer("confluentinc/cp-kafka:latest") @@ -15,19 +17,23 @@ def setup_obsrv_database(request): postgres.start() kafka.start() - with open(os.path.join(os.path.dirname(__file__), 'config/config.template.yaml')) as config_file: + with open( + os.path.join(os.path.dirname(__file__), "config/config.template.yaml") + ) as config_file: config = yaml.safe_load(config_file) - config['connector-instance-id'] = 'test.new-york-taxi-data.1' + config["connector-instance-id"] = "test.new-york-taxi-data.1" - config['postgres']['host'] = postgres.get_container_host_ip() - config['postgres']['port'] = postgres.get_exposed_port(5432) - config['postgres']['user'] = postgres.username - config['postgres']['password'] = postgres.password - config['postgres']['dbname'] = postgres.dbname - config['kafka']['bootstrap-servers'] = kafka.get_bootstrap_server() + config["postgres"]["host"] = postgres.get_container_host_ip() + config["postgres"]["port"] = postgres.get_exposed_port(5432) + config["postgres"]["user"] = postgres.username + config["postgres"]["password"] = postgres.password + config["postgres"]["dbname"] = postgres.dbname + config["kafka"]["broker-servers"] = kafka.get_bootstrap_server() - with open(os.path.join(os.path.dirname(__file__), 'config/config.yaml'), 'w') as config_file: + with open( + os.path.join(os.path.dirname(__file__), "config/config.yaml"), "w" + ) as config_file: yaml.dump(config, config_file) create_tables(config) @@ -37,9 +43,9 @@ def remove_container(): postgres.stop() kafka.stop() try: - os.remove(os.path.join(os.path.dirname(__file__), 'config/config.yaml')) + os.remove(os.path.join(os.path.dirname(__file__), "config/config.yaml")) except FileNotFoundError: print("config file already removed") pass - request.addfinalizer(remove_container) \ No newline at end of file + request.addfinalizer(remove_container) diff --git a/tests/config/config.template.yaml b/tests/config/config.template.yaml index 829323f..d39a558 100644 --- a/tests/config/config.template.yaml +++ b/tests/config/config.template.yaml @@ -6,7 +6,7 @@ postgres: port: 5432 kafka: - bootstrap-servers: localhost:9092 + broker-servers: localhost:9092 telemetry-topic: test.telemetry connector-metrics-topic: test.metrics producer: diff --git a/tests/create_tables.py b/tests/create_tables.py index ac167ff..f1b235a 100644 --- a/tests/create_tables.py +++ b/tests/create_tables.py @@ -4,8 +4,9 @@ import json from obsrv.utils import EncryptionUtil + def create_tables(config): - enc = EncryptionUtil(config['obsrv_encryption_key']) + enc = EncryptionUtil(config["obsrv_encryption_key"]) datasets = """ CREATE TABLE IF NOT EXISTS datasets ( @@ -89,8 +90,11 @@ def create_tables(config): ('test.1', '1', 'source', 'object', 'test_reader', 'test_reader', 'Python', 'Apache 2.0', 'ravi@obsrv.ai', 'http://localhost', 'Live', 'SYSTEM', 'SYSTEM', now()); """ - connector_config = json.dumps({"type":"local"}) - enc_config = {"is_encrypted": True, "connector_config": enc.encrypt(connector_config)} + connector_config = json.dumps({"type": "local"}) + enc_config = { + "is_encrypted": True, + "connector_config": enc.encrypt(connector_config), + } ins_ci = """ INSERT INTO connector_instances (id, dataset_id, connector_id, connector_type, connector_config, operations_config, status, connector_state, connector_stats, created_by, updated_by, created_date, updated_date, published_date) VALUES @@ -98,15 +102,16 @@ def create_tables(config): ); """ - - with open(os.path.join(os.path.dirname(__file__), 'config/config.yaml'), 'r') as config_file: + with open( + os.path.join(os.path.dirname(__file__), "config/config.yaml"), "r" + ) as config_file: config = yaml.safe_load(config_file) conn = psycopg2.connect( - host=config['postgres']['host'], - port=config['postgres']['port'], - user=config['postgres']['user'], - password=config['postgres']['password'], - dbname=config['postgres']['dbname'] + host=config["postgres"]["host"], + port=config["postgres"]["port"], + user=config["postgres"]["user"], + password=config["postgres"]["password"], + dbname=config["postgres"]["dbname"], ) cur = conn.cursor() @@ -120,4 +125,4 @@ def create_tables(config): cur.execute(ins_ci, (json.dumps(enc_config),)) conn.commit() - conn.close() \ No newline at end of file + conn.close() diff --git a/tests/test_batch_connector.py b/tests/test_batch_connector.py index 67a6b5b..607c722 100644 --- a/tests/test_batch_connector.py +++ b/tests/test_batch_connector.py @@ -14,6 +14,7 @@ from obsrv.connector.batch import ISourceConnector, SourceConnector from obsrv.connector import ConnectorContext from obsrv.connector import MetricsCollector + # from obsrv.models import ErrorData, StatusCode from tests.create_tables import create_tables @@ -21,11 +22,17 @@ class TestSource(ISourceConnector): - def process(self, sc: SparkSession, ctx: ConnectorContext, connector_config: Dict[Any, Any], metrics_collector: MetricsCollector) -> DataFrame: - df = sc.read.format("json").load('tests/sample_data/nyt_data_100.json.gz') + def process( + self, + sc: SparkSession, + ctx: ConnectorContext, + connector_config: Dict[Any, Any], + metrics_collector: MetricsCollector, + ) -> DataFrame: + df = sc.read.format("json").load("tests/sample_data/nyt_data_100.json.gz") yield df - df1 = sc.read.format("json").load('tests/sample_data/nyt_data_100.json') + df1 = sc.read.format("json").load("tests/sample_data/nyt_data_100.json") yield df1 @@ -33,21 +40,26 @@ def get_spark_conf(self, connector_config) -> SparkConf: conf = get_base_conf() return conf + @pytest.mark.usefixtures("setup_obsrv_database") class TestBatchConnector(unittest.TestCase): def test_source_connector(self): connector = TestSource() - config_file_path = os.path.join(os.path.dirname(__file__), 'config/config.yaml') + config_file_path = os.path.join(os.path.dirname(__file__), "config/config.yaml") config = yaml.safe_load(open(config_file_path)) self.assertEqual(os.path.exists(config_file_path), True) - test_raw_topic = 'test.ingest' - test_metrics_topic = 'test.metrics' + test_raw_topic = "test.ingest" + test_metrics_topic = "test.metrics" - kafka_consumer = KafkaConsumer(bootstrap_servers=config['kafka']['bootstrap-servers'], group_id='test-group', enable_auto_commit=True) + kafka_consumer = KafkaConsumer( + bootstrap_servers=config["kafka"]["broker-servers"], + group_id="test-group", + enable_auto_commit=True, + ) trt_consumer = TopicPartition(test_raw_topic, 0) tmt_consumer = TopicPartition(test_metrics_topic, 0) diff --git a/tests/test_conf.yaml b/tests/test_conf.yaml index 829323f..d39a558 100644 --- a/tests/test_conf.yaml +++ b/tests/test_conf.yaml @@ -6,7 +6,7 @@ postgres: port: 5432 kafka: - bootstrap-servers: localhost:9092 + broker-servers: localhost:9092 telemetry-topic: test.telemetry connector-metrics-topic: test.metrics producer: diff --git a/tests/test_connector_registry.py b/tests/test_connector_registry.py index 75a5da5..37daa5a 100644 --- a/tests/test_connector_registry.py +++ b/tests/test_connector_registry.py @@ -6,22 +6,29 @@ from tests.batch_setup import setup_obsrv_database + @pytest.mark.usefixtures("setup_obsrv_database") class TestConnectorRegistry(unittest.TestCase): def setUp(self) -> None: - self.connector_id = 'test.1' - self.connector_instance_id = 'test.new-york-taxi-data.1' + self.connector_id = "test.1" + self.connector_instance_id = "test.new-york-taxi-data.1" self.connector_registry = ConnectorRegistry() - with open(os.path.join(os.path.dirname(__file__), 'config/config.yaml')) as config_file: + with open( + os.path.join(os.path.dirname(__file__), "config/config.yaml") + ) as config_file: config = yaml.safe_load(config_file) - self.postgres_config = config['postgres'] + self.postgres_config = config["postgres"] def test_get_connector_instances(self): - connector_instances = self.connector_registry.get_connector_instances(self.connector_id, self.postgres_config) + connector_instances = self.connector_registry.get_connector_instances( + self.connector_id, self.postgres_config + ) self.assertIsInstance(connector_instances, list) self.assertEqual(len(connector_instances), 1) def test_get_connector_instance(self): - connector_instance = ConnectorRegistry.get_connector_instance(self.connector_instance_id, self.postgres_config) + connector_instance = ConnectorRegistry.get_connector_instance( + self.connector_instance_id, self.postgres_config + ) self.assertIsInstance(connector_instance, ConnectorInstance) diff --git a/tests/test_encryption_utils.py b/tests/test_encryption_utils.py index d9bf0d8..9d66dd0 100644 --- a/tests/test_encryption_utils.py +++ b/tests/test_encryption_utils.py @@ -1,26 +1,27 @@ import unittest from obsrv.utils import EncryptionUtil + class TestEncryptionUtil(unittest.TestCase): def setUp(self): - self.encryption_key = '5Gw743MySPvkcobvtVQoFJ0tUqAZ8TUw' + self.encryption_key = "5Gw743MySPvkcobvtVQoFJ0tUqAZ8TUw" self.encryption_util = EncryptionUtil(self.encryption_key) def test_encrypt(self): - plaintext = 'Hello, World!' + plaintext = "Hello, World!" encrypted_text = self.encryption_util.encrypt(plaintext) - self.assertEqual(encrypted_text, 'tz3mCbuoi8dfMSuIPngERg==') + self.assertEqual(encrypted_text, "tz3mCbuoi8dfMSuIPngERg==") self.assertNotEqual(plaintext, encrypted_text) def test_decrypt(self): - plaintext = 'Hello, World!' + plaintext = "Hello, World!" encrypted_text = self.encryption_util.encrypt(plaintext) decrypted_text = self.encryption_util.decrypt(encrypted_text) self.assertEqual(plaintext, decrypted_text) def test_decrypt_wrong_key(self): - plaintext = 'Hello, World!' + plaintext = "Hello, World!" encrypted_text = self.encryption_util.encrypt(plaintext) - wrong_key_encryption_util = EncryptionUtil('ozfS4yogdS8opAsIO7bhPc5jkwoJ8wUy') + wrong_key_encryption_util = EncryptionUtil("ozfS4yogdS8opAsIO7bhPc5jkwoJ8wUy") with self.assertRaises(Exception): wrong_key_encryption_util.decrypt(encrypted_text) diff --git a/tests/test_setup.py b/tests/test_setup.py deleted file mode 100644 index f11fdb0..0000000 --- a/tests/test_setup.py +++ /dev/null @@ -1,24 +0,0 @@ -# import os -# import pytest -from testcontainers.postgres import PostgresContainer -from testcontainers.kafka import KafkaContainer -import yaml -# import psycopg2 - - -# def test_create_dataset(): -# with open(os.path.join(os.path.dirname(__file__), 'config.yaml'), 'r') as config_file: -# config = yaml.safe_load(config_file) -# conn = psycopg2.connect( -# host=config['postgres']['host'], -# port=config['postgres']['port'], -# user=config['postgres']['user'], -# password=config['postgres']['password'], -# dbname=config['postgres']['dbname'] -# ) - -# cur = conn.cursor() -# cur.execute("SELECT * FROM public.datasets;") -# result = cur.fetchone() - -# assert result[0] == 'new-york-taxi-data' \ No newline at end of file From 92ce46fc40f4d84c060efb732854e513dcd3bae7 Mon Sep 17 00:00:00 2001 From: Ravi Mula Date: Sun, 12 May 2024 10:51:36 +0530 Subject: [PATCH 05/13] Sanketika-Obsrv/issue-tracker#128: organize imports --- obsrv/connector/batch/obsrv_dataset.py | 6 ++++-- obsrv/connector/batch/source.py | 28 +++++++++++++------------- obsrv/connector/metrics_collector.py | 5 +++-- obsrv/connector/registry.py | 1 + obsrv/models/__init__.py | 2 +- obsrv/models/data_models.py | 2 +- obsrv/utils/__init__.py | 4 ++-- obsrv/utils/config.py | 6 ++++-- obsrv/utils/encyption.py | 3 ++- tests/batch_setup.py | 5 +++-- tests/create_tables.py | 6 ++++-- tests/test_batch_connector.py | 22 +++++++++----------- tests/test_connector_registry.py | 5 +++-- tests/test_encryption_utils.py | 1 + 14 files changed, 52 insertions(+), 44 deletions(-) diff --git a/obsrv/connector/batch/obsrv_dataset.py b/obsrv/connector/batch/obsrv_dataset.py index 38b878d..2bba1cc 100644 --- a/obsrv/connector/batch/obsrv_dataset.py +++ b/obsrv/connector/batch/obsrv_dataset.py @@ -1,8 +1,10 @@ import json import time + from pyspark.sql import DataFrame -from pyspark.sql.functions import lit, struct, to_json, from_json, length -from pyspark.sql.types import StructType, StructField, StringType +from pyspark.sql.functions import from_json, length, lit, struct, to_json +from pyspark.sql.types import StringType, StructField, StructType + from obsrv.utils import LoggerController logger = LoggerController(__name__) diff --git a/obsrv/connector/batch/source.py b/obsrv/connector/batch/source.py index 719c221..af0a379 100644 --- a/obsrv/connector/batch/source.py +++ b/obsrv/connector/batch/source.py @@ -1,17 +1,17 @@ +import json import time -from typing import Dict, Any, final, AnyStr, Iterator -from pyspark.sql import SparkSession, DataFrame +from abc import ABC, abstractmethod +from typing import Any, AnyStr, Dict, Iterator, final + from pyspark.conf import SparkConf +from pyspark.sql import DataFrame, SparkSession from obsrv.common import ObsrvException -from obsrv.models import ExecutionMetric, ErrorData -from obsrv.utils import EncryptionUtil, Config, LoggerController -from obsrv.connector import MetricsCollector, ConnectorInstance, ConnectorContext -from obsrv.connector.registry import ConnectorRegistry +from obsrv.connector import ConnectorContext, ConnectorInstance, MetricsCollector from obsrv.connector.batch.obsrv_dataset import ObsrvDataset - -from abc import ABC, abstractmethod -import json +from obsrv.connector.registry import ConnectorRegistry +from obsrv.models import ErrorData, ExecutionMetric +from obsrv.utils import Config, EncryptionUtil, LoggerController logger = LoggerController(__name__) @@ -204,13 +204,13 @@ def process(connector: ISourceConnector, config_file_path: AnyStr, **kwargs): def get_metrics_schema(): from pyspark.sql.types import ( - StructType, - StructField, - StringType, + ArrayType, + DoubleType, LongType, MapType, - DoubleType, - ArrayType, + StringType, + StructField, + StructType, ) schema = StructType( diff --git a/obsrv/connector/metrics_collector.py b/obsrv/connector/metrics_collector.py index 9f2e29a..9bc8b25 100644 --- a/obsrv/connector/metrics_collector.py +++ b/obsrv/connector/metrics_collector.py @@ -1,6 +1,7 @@ -import uuid import time -from typing import List, Dict +import uuid +from typing import Dict, List + from obsrv.models import EventID, Metric, MetricContext, MetricData diff --git a/obsrv/connector/registry.py b/obsrv/connector/registry.py index 2daa912..ea2e905 100644 --- a/obsrv/connector/registry.py +++ b/obsrv/connector/registry.py @@ -1,6 +1,7 @@ import json from dataclasses import dataclass from typing import Optional + from obsrv.common import ObsrvException from obsrv.models import ErrorData from obsrv.utils import PostgresConnect diff --git a/obsrv/models/__init__.py b/obsrv/models/__init__.py index 61292a1..9cab1f9 100644 --- a/obsrv/models/__init__.py +++ b/obsrv/models/__init__.py @@ -1,2 +1,2 @@ from .data_models import ErrorData, EventID, StatusCode -from .metric import Metric, MetricContext, MetricData, ExecutionMetric +from .metric import ExecutionMetric, Metric, MetricContext, MetricData diff --git a/obsrv/models/data_models.py b/obsrv/models/data_models.py index 727deff..e6fdd3f 100644 --- a/obsrv/models/data_models.py +++ b/obsrv/models/data_models.py @@ -1,5 +1,5 @@ -from enum import Enum from dataclasses import dataclass +from enum import Enum # from typing import Optional, Dict, Any diff --git a/obsrv/utils/__init__.py b/obsrv/utils/__init__.py index a82ac34..1c74912 100644 --- a/obsrv/utils/__init__.py +++ b/obsrv/utils/__init__.py @@ -1,5 +1,5 @@ from .config import Config -from .encyption import EncryptionUtil from .db_util import PostgresConnect -from .time import time_it +from .encyption import EncryptionUtil from .logger import LoggerController +from .time import time_it diff --git a/obsrv/utils/config.py b/obsrv/utils/config.py index db43316..5736f1c 100644 --- a/obsrv/utils/config.py +++ b/obsrv/utils/config.py @@ -1,7 +1,9 @@ +import operator +from functools import reduce + import yaml from yaml.loader import SafeLoader -from functools import reduce -import operator + from .logger import LoggerController logger = LoggerController(__name__) diff --git a/obsrv/utils/encyption.py b/obsrv/utils/encyption.py index 98152fa..d932cd3 100644 --- a/obsrv/utils/encyption.py +++ b/obsrv/utils/encyption.py @@ -1,4 +1,5 @@ -from base64 import b64encode, b64decode +from base64 import b64decode, b64encode + from Crypto.Cipher import AES from Crypto.Util.Padding import pad, unpad diff --git a/tests/batch_setup.py b/tests/batch_setup.py index c660145..1dfbe42 100644 --- a/tests/batch_setup.py +++ b/tests/batch_setup.py @@ -1,8 +1,9 @@ import os + import pytest -from testcontainers.postgres import PostgresContainer -from testcontainers.kafka import KafkaContainer import yaml +from testcontainers.kafka import KafkaContainer +from testcontainers.postgres import PostgresContainer from tests.create_tables import create_tables diff --git a/tests/create_tables.py b/tests/create_tables.py index f1b235a..2cfacea 100644 --- a/tests/create_tables.py +++ b/tests/create_tables.py @@ -1,7 +1,9 @@ -import yaml +import json import os + import psycopg2 -import json +import yaml + from obsrv.utils import EncryptionUtil diff --git a/tests/test_batch_connector.py b/tests/test_batch_connector.py index 607c722..c01bb12 100644 --- a/tests/test_batch_connector.py +++ b/tests/test_batch_connector.py @@ -1,24 +1,20 @@ import os import unittest -import yaml -import pytest from typing import Any, Dict -from testcontainers.postgres import PostgresContainer -from testcontainers.kafka import KafkaContainer + +import pytest +import yaml from kafka import KafkaConsumer, TopicPartition -from pyspark.sql import SparkSession, DataFrame from pyspark.conf import SparkConf +from pyspark.sql import DataFrame, SparkSession +from testcontainers.kafka import KafkaContainer +from testcontainers.postgres import PostgresContainer -# from obsrv.common import ObsrvException -from obsrv.job.batch import get_base_conf +from obsrv.connector import ConnectorContext, MetricsCollector from obsrv.connector.batch import ISourceConnector, SourceConnector -from obsrv.connector import ConnectorContext -from obsrv.connector import MetricsCollector - -# from obsrv.models import ErrorData, StatusCode - -from tests.create_tables import create_tables +from obsrv.job.batch import get_base_conf from tests.batch_setup import setup_obsrv_database +from tests.create_tables import create_tables class TestSource(ISourceConnector): diff --git a/tests/test_connector_registry.py b/tests/test_connector_registry.py index 37daa5a..119c526 100644 --- a/tests/test_connector_registry.py +++ b/tests/test_connector_registry.py @@ -1,9 +1,10 @@ +import os import unittest + import pytest -import os import yaml -from obsrv.connector.registry import ConnectorRegistry, ConnectorInstance +from obsrv.connector.registry import ConnectorInstance, ConnectorRegistry from tests.batch_setup import setup_obsrv_database diff --git a/tests/test_encryption_utils.py b/tests/test_encryption_utils.py index 9d66dd0..b034a90 100644 --- a/tests/test_encryption_utils.py +++ b/tests/test_encryption_utils.py @@ -1,4 +1,5 @@ import unittest + from obsrv.utils import EncryptionUtil From 1242455e1ea6c1f396f2a6d80ad148982206a6e2 Mon Sep 17 00:00:00 2001 From: Ravi Mula Date: Sun, 12 May 2024 11:09:44 +0530 Subject: [PATCH 06/13] Sanketika-Obsrv/issue-tracker#128: remove unused imports --- obsrv/common/__init__.py | 1 + obsrv/common/exception.py | 2 +- obsrv/connector/__init__.py | 1 + obsrv/connector/batch/__init__.py | 1 + obsrv/connector/batch/source.py | 2 +- obsrv/job/batch/__init__.py | 1 + obsrv/models/__init__.py | 1 + obsrv/utils/__init__.py | 1 + obsrv/utils/config.py | 1 - poetry.lock | 140 +++++++++++++++++++++++++++++- pyproject.toml | 6 +- tests/batch_setup.py | 1 - tests/test_batch_connector.py | 5 +- tests/test_connector_registry.py | 2 +- 14 files changed, 152 insertions(+), 13 deletions(-) diff --git a/obsrv/common/__init__.py b/obsrv/common/__init__.py index 1b8f04f..da63e5f 100644 --- a/obsrv/common/__init__.py +++ b/obsrv/common/__init__.py @@ -1 +1,2 @@ +# autoflake: skip_file from .exception import ObsrvException diff --git a/obsrv/common/exception.py b/obsrv/common/exception.py index d504059..e2c6314 100644 --- a/obsrv/common/exception.py +++ b/obsrv/common/exception.py @@ -1,4 +1,4 @@ -from obsrv.models import ErrorData +# from obsrv.models import ErrorData from obsrv.utils import LoggerController logger = LoggerController(__name__) diff --git a/obsrv/connector/__init__.py b/obsrv/connector/__init__.py index af0f587..761cb16 100644 --- a/obsrv/connector/__init__.py +++ b/obsrv/connector/__init__.py @@ -1,2 +1,3 @@ +# autoflake: skip_file from .metrics_collector import MetricsCollector from .registry import ConnectorContext, ConnectorInstance diff --git a/obsrv/connector/batch/__init__.py b/obsrv/connector/batch/__init__.py index ad9de37..1552873 100644 --- a/obsrv/connector/batch/__init__.py +++ b/obsrv/connector/batch/__init__.py @@ -1,2 +1,3 @@ +# autoflake: skip_file from .obsrv_dataset import ObsrvDataset from .source import ISourceConnector, SourceConnector diff --git a/obsrv/connector/batch/source.py b/obsrv/connector/batch/source.py index af0a379..ab19d88 100644 --- a/obsrv/connector/batch/source.py +++ b/obsrv/connector/batch/source.py @@ -1,7 +1,7 @@ import json import time from abc import ABC, abstractmethod -from typing import Any, AnyStr, Dict, Iterator, final +from typing import Any, AnyStr, Dict, final from pyspark.conf import SparkConf from pyspark.sql import DataFrame, SparkSession diff --git a/obsrv/job/batch/__init__.py b/obsrv/job/batch/__init__.py index d2719b5..2d58f4d 100644 --- a/obsrv/job/batch/__init__.py +++ b/obsrv/job/batch/__init__.py @@ -1 +1,2 @@ +# autoflake: skip_file from .utils import get_base_conf diff --git a/obsrv/models/__init__.py b/obsrv/models/__init__.py index 9cab1f9..4598e4a 100644 --- a/obsrv/models/__init__.py +++ b/obsrv/models/__init__.py @@ -1,2 +1,3 @@ +# autoflake: skip_file from .data_models import ErrorData, EventID, StatusCode from .metric import ExecutionMetric, Metric, MetricContext, MetricData diff --git a/obsrv/utils/__init__.py b/obsrv/utils/__init__.py index 1c74912..b279fa3 100644 --- a/obsrv/utils/__init__.py +++ b/obsrv/utils/__init__.py @@ -1,3 +1,4 @@ +# autoflake: skip_file from .config import Config from .db_util import PostgresConnect from .encyption import EncryptionUtil diff --git a/obsrv/utils/config.py b/obsrv/utils/config.py index 5736f1c..5c5ecb5 100644 --- a/obsrv/utils/config.py +++ b/obsrv/utils/config.py @@ -2,7 +2,6 @@ from functools import reduce import yaml -from yaml.loader import SafeLoader from .logger import LoggerController diff --git a/poetry.lock b/poetry.lock index 5eb0268..d5caf74 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,5 +1,66 @@ # This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +[[package]] +name = "autoflake" +version = "2.3.1" +description = "Removes unused imports and unused variables" +optional = false +python-versions = ">=3.8" +files = [ + {file = "autoflake-2.3.1-py3-none-any.whl", hash = "sha256:3ae7495db9084b7b32818b4140e6dc4fc280b712fb414f5b8fe57b0a8e85a840"}, + {file = "autoflake-2.3.1.tar.gz", hash = "sha256:c98b75dc5b0a86459c4f01a1d32ac7eb4338ec4317a4469515ff1e687ecd909e"}, +] + +[package.dependencies] +pyflakes = ">=3.0.0" +tomli = {version = ">=2.0.1", markers = "python_version < \"3.11\""} + +[[package]] +name = "black" +version = "24.4.2" +description = "The uncompromising code formatter." +optional = false +python-versions = ">=3.8" +files = [ + {file = "black-24.4.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dd1b5a14e417189db4c7b64a6540f31730713d173f0b63e55fabd52d61d8fdce"}, + {file = "black-24.4.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e537d281831ad0e71007dcdcbe50a71470b978c453fa41ce77186bbe0ed6021"}, + {file = "black-24.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaea3008c281f1038edb473c1aa8ed8143a5535ff18f978a318f10302b254063"}, + {file = "black-24.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:7768a0dbf16a39aa5e9a3ded568bb545c8c2727396d063bbaf847df05b08cd96"}, + {file = "black-24.4.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:257d724c2c9b1660f353b36c802ccece186a30accc7742c176d29c146df6e474"}, + {file = "black-24.4.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bdde6f877a18f24844e381d45e9947a49e97933573ac9d4345399be37621e26c"}, + {file = "black-24.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e151054aa00bad1f4e1f04919542885f89f5f7d086b8a59e5000e6c616896ffb"}, + {file = "black-24.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:7e122b1c4fb252fd85df3ca93578732b4749d9be076593076ef4d07a0233c3e1"}, + {file = "black-24.4.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:accf49e151c8ed2c0cdc528691838afd217c50412534e876a19270fea1e28e2d"}, + {file = "black-24.4.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:88c57dc656038f1ab9f92b3eb5335ee9b021412feaa46330d5eba4e51fe49b04"}, + {file = "black-24.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be8bef99eb46d5021bf053114442914baeb3649a89dc5f3a555c88737e5e98fc"}, + {file = "black-24.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:415e686e87dbbe6f4cd5ef0fbf764af7b89f9057b97c908742b6008cc554b9c0"}, + {file = "black-24.4.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bf10f7310db693bb62692609b397e8d67257c55f949abde4c67f9cc574492cc7"}, + {file = "black-24.4.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:98e123f1d5cfd42f886624d84464f7756f60ff6eab89ae845210631714f6db94"}, + {file = "black-24.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48a85f2cb5e6799a9ef05347b476cce6c182d6c71ee36925a6c194d074336ef8"}, + {file = "black-24.4.2-cp38-cp38-win_amd64.whl", hash = "sha256:b1530ae42e9d6d5b670a34db49a94115a64596bc77710b1d05e9801e62ca0a7c"}, + {file = "black-24.4.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:37aae07b029fa0174d39daf02748b379399b909652a806e5708199bd93899da1"}, + {file = "black-24.4.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:da33a1a5e49c4122ccdfd56cd021ff1ebc4a1ec4e2d01594fef9b6f267a9e741"}, + {file = "black-24.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef703f83fc32e131e9bcc0a5094cfe85599e7109f896fe8bc96cc402f3eb4b6e"}, + {file = "black-24.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:b9176b9832e84308818a99a561e90aa479e73c523b3f77afd07913380ae2eab7"}, + {file = "black-24.4.2-py3-none-any.whl", hash = "sha256:d36ed1124bb81b32f8614555b34cc4259c3fbc7eec17870e8ff8ded335b58d8c"}, + {file = "black-24.4.2.tar.gz", hash = "sha256:c872b53057f000085da66a19c55d68f6f8ddcac2642392ad3a355878406fbd4d"}, +] + +[package.dependencies] +click = ">=8.0.0" +mypy-extensions = ">=0.4.3" +packaging = ">=22.0" +pathspec = ">=0.9.0" +platformdirs = ">=2" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} + +[package.extras] +colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] +jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] +uvloop = ["uvloop (>=0.15.2)"] + [[package]] name = "certifi" version = "2024.2.2" @@ -110,6 +171,20 @@ files = [ {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, ] +[[package]] +name = "click" +version = "8.1.7" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.7" +files = [ + {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, + {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + [[package]] name = "colorama" version = "0.4.6" @@ -242,6 +317,20 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "isort" +version = "5.13.2" +description = "A Python utility / library to sort Python imports." +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "isort-5.13.2-py3-none-any.whl", hash = "sha256:8ca5e72a8d85860d5a3fa69b8745237f2939afe12dbf656afbcb47fe72d947a6"}, + {file = "isort-5.13.2.tar.gz", hash = "sha256:48fdfcb9face5d58a4f6dde2e72a1fb8dcaf8ab26f95ab49fab84c2ddefb0109"}, +] + +[package.extras] +colors = ["colorama (>=0.4.6)"] + [[package]] name = "kafka-python-ng" version = "2.2.2" @@ -260,6 +349,17 @@ lz4 = ["lz4"] snappy = ["python-snappy"] zstd = ["zstandard"] +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.5" +files = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] + [[package]] name = "packaging" version = "24.0" @@ -271,6 +371,33 @@ files = [ {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"}, ] +[[package]] +name = "pathspec" +version = "0.12.1" +description = "Utility library for gitignore style pattern matching of file paths." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, + {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, +] + +[[package]] +name = "platformdirs" +version = "4.2.1" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." +optional = false +python-versions = ">=3.8" +files = [ + {file = "platformdirs-4.2.1-py3-none-any.whl", hash = "sha256:17d5a1161b3fd67b390023cb2d3b026bbd40abde6fdb052dfbd3a29c3ba22ee1"}, + {file = "platformdirs-4.2.1.tar.gz", hash = "sha256:031cd18d4ec63ec53e82dceaac0417d218a6863f7745dfcc9efe7793b7039bdf"}, +] + +[package.extras] +docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"] +type = ["mypy (>=1.8)"] + [[package]] name = "pluggy" version = "1.5.0" @@ -419,6 +546,17 @@ files = [ {file = "pycryptodome-3.20.0.tar.gz", hash = "sha256:09609209ed7de61c2b560cc5c8c4fbf892f8b15b1faf7e4cbffac97db1fffda7"}, ] +[[package]] +name = "pyflakes" +version = "3.2.0" +description = "passive checker of Python programs" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pyflakes-3.2.0-py2.py3-none-any.whl", hash = "sha256:84b5be138a2dfbb40689ca07e2152deb896a65c3a3e24c251c5c62489568074a"}, + {file = "pyflakes-3.2.0.tar.gz", hash = "sha256:1c61603ff154621fb2a9172037d84dca3500def8c8b630657d1701f026f8af3f"}, +] + [[package]] name = "pyspark" version = "3.5.1" @@ -732,4 +870,4 @@ batch = ["pyspark"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "73c7e1f23d313ff232e91e8547f16ffe7626fca8c17fc5367584546f9dc83fe1" +content-hash = "69f24bcc17d16a4dce9485131eb09775008ee561074708c2c8ef707c3d1e3245" diff --git a/pyproject.toml b/pyproject.toml index 145da2f..e5577ef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,9 +25,9 @@ pytest = "^8.1.1" coverage = "^7.4.3" pyspark = "^3.5.1" testcontainers = {extras = ["kafka", "postgres"], version = "^4.4.0"} - -[tool.poetry.group.batch.dependencies] -pyspark = "^3.5.1" +black = "^24.4.2" +isort = "^5.13.2" +autoflake = "^2.3.1" [build-system] requires = ["poetry-core"] diff --git a/tests/batch_setup.py b/tests/batch_setup.py index 1dfbe42..a85620b 100644 --- a/tests/batch_setup.py +++ b/tests/batch_setup.py @@ -47,6 +47,5 @@ def remove_container(): os.remove(os.path.join(os.path.dirname(__file__), "config/config.yaml")) except FileNotFoundError: print("config file already removed") - pass request.addfinalizer(remove_container) diff --git a/tests/test_batch_connector.py b/tests/test_batch_connector.py index c01bb12..ddb108e 100644 --- a/tests/test_batch_connector.py +++ b/tests/test_batch_connector.py @@ -7,14 +7,11 @@ from kafka import KafkaConsumer, TopicPartition from pyspark.conf import SparkConf from pyspark.sql import DataFrame, SparkSession -from testcontainers.kafka import KafkaContainer -from testcontainers.postgres import PostgresContainer from obsrv.connector import ConnectorContext, MetricsCollector from obsrv.connector.batch import ISourceConnector, SourceConnector from obsrv.job.batch import get_base_conf -from tests.batch_setup import setup_obsrv_database -from tests.create_tables import create_tables +from tests.batch_setup import setup_obsrv_database # noqa class TestSource(ISourceConnector): diff --git a/tests/test_connector_registry.py b/tests/test_connector_registry.py index 119c526..e024059 100644 --- a/tests/test_connector_registry.py +++ b/tests/test_connector_registry.py @@ -5,7 +5,7 @@ import yaml from obsrv.connector.registry import ConnectorInstance, ConnectorRegistry -from tests.batch_setup import setup_obsrv_database +from tests.batch_setup import setup_obsrv_database # noqa @pytest.mark.usefixtures("setup_obsrv_database") From 2567797f586038ac3d2edb21c5d4db5c92f35fb7 Mon Sep 17 00:00:00 2001 From: Ravi Mula Date: Mon, 13 May 2024 09:58:44 +0530 Subject: [PATCH 07/13] Sanketika-Obsrv/issue-tracker#128: add data_format and make encryption default --- obsrv/connector/registry.py | 2 +- obsrv/models/__init__.py | 2 +- obsrv/models/data_models.py | 4 ++++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/obsrv/connector/registry.py b/obsrv/connector/registry.py index ea2e905..19335d3 100644 --- a/obsrv/connector/registry.py +++ b/obsrv/connector/registry.py @@ -13,7 +13,7 @@ class ConnectorContext: dataset_id: str connector_instance_id: str connector_type: str - # data_format: str + data_format: str entry_topic: Optional[str] = None building_block: Optional[str] = None env: Optional[str] = None diff --git a/obsrv/models/__init__.py b/obsrv/models/__init__.py index 4598e4a..8ef49f4 100644 --- a/obsrv/models/__init__.py +++ b/obsrv/models/__init__.py @@ -1,3 +1,3 @@ # autoflake: skip_file -from .data_models import ErrorData, EventID, StatusCode +from .data_models import ErrorData, EventID, StatusCode, ExecutionState from .metric import ExecutionMetric, Metric, MetricContext, MetricData diff --git a/obsrv/models/data_models.py b/obsrv/models/data_models.py index e6fdd3f..de74ae6 100644 --- a/obsrv/models/data_models.py +++ b/obsrv/models/data_models.py @@ -19,6 +19,10 @@ class ErrorData: error_code: str error_msg: str +class ExecutionState(Enum): + RUNNING = "running" + NOT_RUNNING = "not_running" + QUEUED = "queued" # @dataclass # class ContextData: From 895e68418b3ed1ba465465b47c1e90cba1bf957a Mon Sep 17 00:00:00 2001 From: Ravi Mula Date: Mon, 13 May 2024 11:50:55 +0530 Subject: [PATCH 08/13] Sanketika-Obsrv/issue-tracker#128: ref connector_type from registry --- obsrv/connector/registry.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/obsrv/connector/registry.py b/obsrv/connector/registry.py index 19335d3..1a8d425 100644 --- a/obsrv/connector/registry.py +++ b/obsrv/connector/registry.py @@ -98,9 +98,10 @@ class ConnectorRegistry: def get_connector_instances(connector_id, postgres_config): postgres_connect = PostgresConnect(postgres_config) query = """ - SELECT ci.*, d.dataset_config + SELECT ci.*, d.dataset_config, cr.connector_type FROM connector_instances as ci JOIN datasets d ON ci.dataset_id = d.id + JOIN connector_registry cr on ci.connector_id = cr.id WHERE ci.connector_id = '{}' AND d.status = 'Live' AND ci.status = 'Live' """.format( connector_id @@ -112,9 +113,10 @@ def get_connector_instances(connector_id, postgres_config): def get_connector_instance(connector_instance_id, postgres_config): postgres_connect = PostgresConnect(postgres_config) query = """ - SELECT ci.*, d.dataset_config + SELECT ci.*, d.dataset_config, cr.connector_type FROM connector_instances as ci JOIN datasets d ON ci.dataset_id = d.id + JOIN connector_registry cr on ci.connector_id = cr.id WHERE ci.id = '{}' AND d.status = 'Live' AND ci.status = 'Live' """.format( connector_instance_id From a0233aaff96c1300bddee5f9745327e50836fe72 Mon Sep 17 00:00:00 2001 From: Ravi Mula Date: Mon, 13 May 2024 17:51:51 +0530 Subject: [PATCH 09/13] Sanketika-Obsrv/issue-tracker#128: connector registry schema update --- obsrv/connector/registry.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/obsrv/connector/registry.py b/obsrv/connector/registry.py index 1a8d425..3683049 100644 --- a/obsrv/connector/registry.py +++ b/obsrv/connector/registry.py @@ -98,7 +98,7 @@ class ConnectorRegistry: def get_connector_instances(connector_id, postgres_config): postgres_connect = PostgresConnect(postgres_config) query = """ - SELECT ci.*, d.dataset_config, cr.connector_type + SELECT ci.*, d.dataset_config, cr.type FROM connector_instances as ci JOIN datasets d ON ci.dataset_id = d.id JOIN connector_registry cr on ci.connector_id = cr.id @@ -113,7 +113,7 @@ def get_connector_instances(connector_id, postgres_config): def get_connector_instance(connector_instance_id, postgres_config): postgres_connect = PostgresConnect(postgres_config) query = """ - SELECT ci.*, d.dataset_config, cr.connector_type + SELECT ci.*, d.dataset_config, cr.type FROM connector_instances as ci JOIN datasets d ON ci.dataset_id = d.id JOIN connector_registry cr on ci.connector_id = cr.id @@ -149,7 +149,7 @@ def parse_connector_instance(rs, postgres_config) -> ConnectorInstance: id = rs["id"] dataset_id = rs["dataset_id"] connector_id = rs["connector_id"] - connector_type = rs["connector_type"] + connector_type = rs["type"] connector_config = rs["connector_config"] data_format = rs["data_format"] operations_config = rs["operations_config"] From d3fb99b2e0d7da80aca08dc0832008800fda37b8 Mon Sep 17 00:00:00 2001 From: Ravi Mula Date: Tue, 21 May 2024 13:13:21 +0530 Subject: [PATCH 10/13] Sanketika-Obsrv/issue-tracker#128: update test cases --- tests/create_tables.py | 6 ++---- tests/test_conf.yaml | 21 --------------------- 2 files changed, 2 insertions(+), 25 deletions(-) delete mode 100644 tests/test_conf.yaml diff --git a/tests/create_tables.py b/tests/create_tables.py index 2cfacea..fb6586f 100644 --- a/tests/create_tables.py +++ b/tests/create_tables.py @@ -58,6 +58,7 @@ def create_tables(config): id TEXT PRIMARY KEY, dataset_id TEXT NOT NULL REFERENCES datasets (id), connector_id TEXT NOT NULL REFERENCES connector_registry (id), + data_format TEXT NOT NULL DEFAULT 'jsonl', connector_type TEXT NOT NULL, connector_config json NOT NULL, operations_config json NOT NULL, @@ -93,10 +94,7 @@ def create_tables(config): """ connector_config = json.dumps({"type": "local"}) - enc_config = { - "is_encrypted": True, - "connector_config": enc.encrypt(connector_config), - } + enc_config = enc.encrypt(connector_config) ins_ci = """ INSERT INTO connector_instances (id, dataset_id, connector_id, connector_type, connector_config, operations_config, status, connector_state, connector_stats, created_by, updated_by, created_date, updated_date, published_date) VALUES diff --git a/tests/test_conf.yaml b/tests/test_conf.yaml deleted file mode 100644 index d39a558..0000000 --- a/tests/test_conf.yaml +++ /dev/null @@ -1,21 +0,0 @@ -postgres: - dbname: postgres - user: postgres - password: postgres - host: localhost - port: 5432 - -kafka: - broker-servers: localhost:9092 - telemetry-topic: test.telemetry - connector-metrics-topic: test.metrics - producer: - compression: gzip - max-request-size: 1000000 # 1MB {1M: 1000000, 10M: 10000000, 5M: 5000000} - -obsrv_encryption_key: random_32_byte_encryption_string - -connector_instance_id: test.new-york-taxi-data.1 - -building-block: py-sdk-test -env: local \ No newline at end of file From b2f654de00820bad7fe5c5fa6338b9b25ce6095a Mon Sep 17 00:00:00 2001 From: Ravi Mula Date: Mon, 10 Jun 2024 15:05:54 +0530 Subject: [PATCH 11/13] Sanketika-Obsrv/issue-tracker#237: read config and instance id from cmd args --- obsrv/connector/batch/source.py | 48 ++++++++++++++++++++++++++++++--- obsrv/models/__init__.py | 2 +- obsrv/models/data_models.py | 2 ++ tests/test_batch_connector.py | 5 ++-- 4 files changed, 50 insertions(+), 7 deletions(-) diff --git a/obsrv/connector/batch/source.py b/obsrv/connector/batch/source.py index ab19d88..774ed47 100644 --- a/obsrv/connector/batch/source.py +++ b/obsrv/connector/batch/source.py @@ -1,7 +1,8 @@ +import argparse import json import time from abc import ABC, abstractmethod -from typing import Any, AnyStr, Dict, final +from typing import Any, Dict, final from pyspark.conf import SparkConf from pyspark.sql import DataFrame, SparkSession @@ -129,11 +130,29 @@ def process_result(result, ctx, config): return (valid_records_count, failed_records_count, end_time - start_time) @final - def process(connector: ISourceConnector, config_file_path: AnyStr, **kwargs): + def process(connector: ISourceConnector, **kwargs): + args = SourceConnector.parse_args() + + config_file_path = ( + args.config_file_path + if args.config_file_path + else kwargs.get("config_file_path", None) + ) + + if config_file_path is None: + raise Exception("Config file path not found") start_time = time.time() config = Config(config_file_path) - connector_instance_id = config.find("connector_instance_id") + connector_instance_id = ( + args.connector_instance_id + if args.connector_instance_id + else config.find("connector_instance_id", None) + ) + + if connector_instance_id is None: + raise Exception("Connector instance id not found") + connector_instance = SourceConnector.get_connector_instance( connector_instance_id, config.find("postgres") ) @@ -251,3 +270,26 @@ def get_metrics_schema(): ) return schema + + @final + def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "-f", + "--config-file-path", + help="Path to the config file containing the default connector configurations", + ) + + parser.add_argument( + "-c", + "--connector-instance-id", + help="connector instance id", + ) + + parser.add_argument( + "--connector.metadata.id", + help="connector id", + ) + + args = parser.parse_args() + return args diff --git a/obsrv/models/__init__.py b/obsrv/models/__init__.py index 8ef49f4..1eef693 100644 --- a/obsrv/models/__init__.py +++ b/obsrv/models/__init__.py @@ -1,3 +1,3 @@ # autoflake: skip_file -from .data_models import ErrorData, EventID, StatusCode, ExecutionState +from .data_models import ErrorData, EventID, ExecutionState, StatusCode from .metric import ExecutionMetric, Metric, MetricContext, MetricData diff --git a/obsrv/models/data_models.py b/obsrv/models/data_models.py index de74ae6..b21dc7d 100644 --- a/obsrv/models/data_models.py +++ b/obsrv/models/data_models.py @@ -19,11 +19,13 @@ class ErrorData: error_code: str error_msg: str + class ExecutionState(Enum): RUNNING = "running" NOT_RUNNING = "not_running" QUEUED = "queued" + # @dataclass # class ContextData: # connector_id: str diff --git a/tests/test_batch_connector.py b/tests/test_batch_connector.py index ddb108e..b0021ab 100644 --- a/tests/test_batch_connector.py +++ b/tests/test_batch_connector.py @@ -1,6 +1,6 @@ import os import unittest -from typing import Any, Dict +from typing import Any, Dict, Iterator import pytest import yaml @@ -21,12 +21,11 @@ def process( ctx: ConnectorContext, connector_config: Dict[Any, Any], metrics_collector: MetricsCollector, - ) -> DataFrame: + ) -> Iterator[DataFrame]: df = sc.read.format("json").load("tests/sample_data/nyt_data_100.json.gz") yield df df1 = sc.read.format("json").load("tests/sample_data/nyt_data_100.json") - yield df1 def get_spark_conf(self, connector_config) -> SparkConf: From 0ff1675fc488c3f6cfaef4cbb347e5d976c64062 Mon Sep 17 00:00:00 2001 From: Ravi Mula Date: Tue, 18 Jun 2024 12:43:39 +0530 Subject: [PATCH 12/13] Sanketika-Obsrv/issue-tracker#128: update obsrv meta --- obsrv/connector/batch/obsrv_dataset.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/obsrv/connector/batch/obsrv_dataset.py b/obsrv/connector/batch/obsrv_dataset.py index 2bba1cc..53adb0d 100644 --- a/obsrv/connector/batch/obsrv_dataset.py +++ b/obsrv/connector/batch/obsrv_dataset.py @@ -3,7 +3,7 @@ from pyspark.sql import DataFrame from pyspark.sql.functions import from_json, length, lit, struct, to_json -from pyspark.sql.types import StringType, StructField, StructType +from pyspark.sql.types import StringType, StructField, StructType, LongType from obsrv.utils import LoggerController @@ -43,10 +43,10 @@ def append_obsrv_meta(self, ctx): obsrv_meta_schema = StructType( [ - StructField("syncts", StringType(), True), - StructField("flags", StringType(), True), - StructField("timespans", StringType(), True), - StructField("error", StringType(), True), + StructField("syncts", LongType(), True), + StructField("flags", StructType(), True), + StructField("timespans", StructType(), True), + StructField("error", StructType(), True), StructField("source", StructType(source_meta), True), ] ) From 8de4a82aafe2b53258dcb033190eac481d3fc932 Mon Sep 17 00:00:00 2001 From: SurabhiAngadi Date: Mon, 5 Aug 2024 11:43:50 +0530 Subject: [PATCH 13/13] fix: remove execution state --- obsrv/models/__init__.py | 2 +- obsrv/models/data_models.py | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/obsrv/models/__init__.py b/obsrv/models/__init__.py index 1eef693..4598e4a 100644 --- a/obsrv/models/__init__.py +++ b/obsrv/models/__init__.py @@ -1,3 +1,3 @@ # autoflake: skip_file -from .data_models import ErrorData, EventID, ExecutionState, StatusCode +from .data_models import ErrorData, EventID, StatusCode from .metric import ExecutionMetric, Metric, MetricContext, MetricData diff --git a/obsrv/models/data_models.py b/obsrv/models/data_models.py index b21dc7d..e6fdd3f 100644 --- a/obsrv/models/data_models.py +++ b/obsrv/models/data_models.py @@ -20,12 +20,6 @@ class ErrorData: error_msg: str -class ExecutionState(Enum): - RUNNING = "running" - NOT_RUNNING = "not_running" - QUEUED = "queued" - - # @dataclass # class ContextData: # connector_id: str