From dfe0e711fb46e0af23aaa167651a441f32ab9187 Mon Sep 17 00:00:00 2001 From: joocer Date: Mon, 22 Jul 2024 21:40:18 +0100 Subject: [PATCH 01/16] WIP --- tarchia/models/eventable.py | 118 +++++++++++++++++++++++++ tarchia/models/metadata_models.py | 16 +++- tarchia/utils/serde.py | 1 - tarchia/v1/routes/commit_management.py | 1 + tarchia/v1/routes/data_management.py | 18 +++- tarchia/v1/routes/table_management.py | 54 +++++++---- tests/components/test_eventable.py | 80 +++++++++++++++++ 7 files changed, 266 insertions(+), 22 deletions(-) create mode 100644 tarchia/models/eventable.py create mode 100644 tests/components/test_eventable.py diff --git a/tarchia/models/eventable.py b/tarchia/models/eventable.py new file mode 100644 index 0000000..bbfcbd9 --- /dev/null +++ b/tarchia/models/eventable.py @@ -0,0 +1,118 @@ +""" +This module provides the functionality to manage event subscriptions and notifications +for a variety of events using a base class `Eventable`. It includes definitions for +supported events, subscription management, and asynchronous notification handling +with retry logic. + +Classes: + Subscription(BaseModel): Represents a subscription with user, event, and URL attributes. + SupportedEvents(Enum): Defines the set of supported events. + Eventable: Base class to manage event subscriptions and notifications. + +Methods: + subscribe(user, event, url): Subscribes a user to an event with a specific URL. + unsubscribe(user, event, url): Unsubscribes a user from an event with a specific URL. + trigger_event(event, data): Triggers an event and notifies all subscribed URLs. + notify_subscribers(url, data): Notifies a single subscriber asynchronously with retries. + _send_request_with_retries(url, data): Sends an HTTP request with retry logic. + +Example Usage: + eventable = Eventable() + eventable.subscribe(user='user1', event='dataset_created', url='http://example.com/webhook_created') + eventable.trigger_event('dataset_created', {'table_id': '123', 'name': 'ExampleTable'}) +""" + +import concurrent.futures +from enum import Enum +from typing import List + +import requests +from orso.tools import retry +from pydantic import BaseModel +from requests.exceptions import ConnectionError +from requests.exceptions import Timeout + + +def is_valid_url(url: str) -> bool: + """ + Check if the given string is a valid URL. + + Parameters: + url (str): The input string to be checked. + + Returns: + bool: True if the input string is a valid URL, False otherwise. + """ + from urllib.parse import urlparse + + try: + result = urlparse(url) + return all([result.scheme, result.netloc]) + except ValueError: + return False + + +class Subscription(BaseModel): + user: str + event: str + url: str + + +class Eventable: + class EventTypes(Enum): + UNDEFINED = "UNDEFINED" + + _executor = None + subscriptions: List[Subscription] = [] + + @classmethod + def _ensure_executor(cls): + if cls._executor is None or cls._executor._shutdown: + cls._executor = concurrent.futures.ThreadPoolExecutor(max_workers=4) + + def subscribe(self, user: str, event: str, url: str): + """Subscribe a URL to a specific event for a user.""" + if not event in self.EventTypes.__members__: + raise ValueError(f"Event '{event}' is not supported.") + if not is_valid_url(url): + raise ValueError(f"URL does not appear to be valid") + subscription = Subscription(user=user, event=event, url=url) + self.subscriptions.append(subscription) + + def unsubscribe(self, user: str, event: str, url: str): + """Unsubscribe a URL from a specific event for a user.""" + self.subscriptions = [ + s + for s in self.subscriptions + if not (s.user == user and s.event == event and s.url == url) + ] + + def trigger_event(self, event: str, data: dict): + """Trigger an event and notify all subscribers.""" + if event not in self.EventTypes.__members__: + raise ValueError(f"Event '{event}' is not supported.") + for subscription in self.subscriptions: + if subscription.event == event: + self.notify_subscribers(subscription.url, data) + + def notify_subscribers(self, url: str, data: dict): + """Notify a single subscriber asynchronously with retries.""" + try: + self._ensure_executor() + if is_valid_url(url): + self._executor.submit(self._send_request_with_retries, url, data) + except Exception as err: + print(f"[TARCHIA] Error notifying subscribers. {err}") + + @retry( + max_tries=3, + backoff_seconds=5, + exponential_backoff=True, + max_backoff=8, + retry_exceptions=(ConnectionError, Timeout), + ) + def _send_request_with_retries(self, url: str, data: dict): + """Send the actual HTTP request with retries.""" + response = requests.post(url, json=data, timeout=10) + response.raise_for_status() + print(f"Notification sent to {url}: {response.status_code}") diff --git a/tarchia/models/metadata_models.py b/tarchia/models/metadata_models.py index 6502e94..68e6988 100644 --- a/tarchia/models/metadata_models.py +++ b/tarchia/models/metadata_models.py @@ -9,6 +9,7 @@ from tarchia.exceptions import DataEntryError +from .eventable import Eventable from .tarchia_base import TarchiaBaseModel @@ -175,13 +176,18 @@ def history_entry(self): ) -class TableCatalogEntry(TarchiaBaseModel): +class TableCatalogEntry(TarchiaBaseModel, Eventable): """ The Catalog entry for a table. This is intended to be stored in a document store like FireStore or MongoDB. """ + class EventTypes(Enum): + """Supported Eventables""" + + NEW_COMMIT = "NEW_COMMIT" + name: str steward: str owner: str @@ -207,7 +213,7 @@ def is_valid(self): return True -class OwnerEntry(TarchiaBaseModel): +class OwnerEntry(Eventable, TarchiaBaseModel): """ Model for owners. @@ -218,6 +224,12 @@ class OwnerEntry(TarchiaBaseModel): memberships (List(str)): Identifiers to automatically map users to Owners """ + class EventTypes(Enum): + """Supported Eventables""" + + DATASET_CREATED = "DATASET_CREATED" + DATASET_DELETED = "DATASET_DELETED" + name: str owner_id: str type: OwnerType diff --git a/tarchia/utils/serde.py b/tarchia/utils/serde.py index 665a67b..243f2bc 100644 --- a/tarchia/utils/serde.py +++ b/tarchia/utils/serde.py @@ -2,7 +2,6 @@ from typing import Any from typing import Dict -import orjson from pydantic import BaseModel diff --git a/tarchia/v1/routes/commit_management.py b/tarchia/v1/routes/commit_management.py index a4f1333..da92f0d 100644 --- a/tarchia/v1/routes/commit_management.py +++ b/tarchia/v1/routes/commit_management.py @@ -72,6 +72,7 @@ async def get_table_commit( table_definition.pop("last_updated_ms", None) table_definition.pop("partitioning") table_definition.pop("location") + table_definition.pop("subscriptions") table_definition["commit_sha"] = commit_sha table_definition["commit_url"] = ( f"{base_url}/v1/tables/{catalog_entry.owner}/{catalog_entry.name}/commits/{commit_sha}" diff --git a/tarchia/v1/routes/data_management.py b/tarchia/v1/routes/data_management.py index eaff0cf..cfa72b0 100644 --- a/tarchia/v1/routes/data_management.py +++ b/tarchia/v1/routes/data_management.py @@ -6,6 +6,7 @@ import orjson from fastapi import APIRouter from fastapi import HTTPException +from fastapi import Request from tarchia import config from tarchia.constants import COMMITS_ROOT @@ -161,7 +162,7 @@ async def start_transaction(table: TableRequest): @router.post("/transactions/commit") -async def commit_transaction(commit_request: CommitRequest): +async def commit_transaction(request: Request, commit_request: CommitRequest): """ Commits a transaction by verifying it, updating the manifest and commit, and updating the catalog. @@ -183,6 +184,7 @@ async def commit_transaction(commit_request: CommitRequest): from tarchia.utils import generate_uuid from tarchia.utils.catalogs import identify_table + base_url = request.url.scheme + "://" + request.url.netloc timestamp = int(time.time_ns() / 1e6) uuid = generate_uuid() @@ -259,15 +261,27 @@ async def commit_transaction(commit_request: CommitRequest): catalog_entry.current_history = uuid catalog_provider.update_table(catalog_entry.table_id, catalog_entry) + # trigger webhooks - this should be async so we don't wait for the outcome + catalog_entry.notify_subscribers( + catalog_entry.EventTypes.NEW_COMMIT, + { + "event": "NEW_COMMIT", + "table": f"{catalog_entry.owner}.{catalog_entry.name}", + "commit": commit.commit_sha, + "url": f"{base_url}/v1/tables/{catalog_entry.owner}/{catalog_entry.name}/commits/{commit.commit_sha}", + }, + ) + return { + "table": f"{catalog_entry.owner}.{catalog_entry.name}", "message": "Transaction committed successfully", "transaction": transaction.transaction_id, "commit": commit.commit_sha, + "url": f"{base_url}/v1/tables/{catalog_entry.owner}/{catalog_entry.name}/commits/{commit.commit_sha}", } except TransactionError as e: raise HTTPException(status_code=400, detail=str(e)) from e except Exception as e: - raise e raise HTTPException(status_code=500, detail=str(e)) from e diff --git a/tarchia/v1/routes/table_management.py b/tarchia/v1/routes/table_management.py index 4650ec1..370c052 100644 --- a/tarchia/v1/routes/table_management.py +++ b/tarchia/v1/routes/table_management.py @@ -69,7 +69,8 @@ async def list_tables(owner: str, request: Request): @router.post("/tables/{owner}", response_class=ORJSONResponse) async def create_table( - request: CreateTableRequest, + request: Request, + table_definition: CreateTableRequest, owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), ): """ @@ -84,46 +85,57 @@ async def create_table( from tarchia.utils import generate_uuid from tarchia.utils.catalogs import identify_owner - # check if we have a table with that name already + base_url = request.url.scheme + "://" + request.url.netloc - catalog_entry = catalog_provider.get_table(owner=owner, table=request.name) + # check if we have a table with that name already + catalog_entry = catalog_provider.get_table(owner=owner, table=table_definition.name) if catalog_entry: # return a 409 - raise AlreadyExistsError(entity=request.name) + raise AlreadyExistsError(entity=table_definition.name) # can we find the owner? - identify_owner(name=owner) + owner_entry = identify_owner(name=owner) table_id = generate_uuid() # We create tables without any commit, at create-time the table has no data and some # table types (external) we never record commits for. new_table = TableCatalogEntry( - name=request.name, + name=table_definition.name, owner=owner, - steward=request.steward, + steward=table_definition.steward, table_id=table_id, format_version=1, - location=request.location, - partitioning=request.partitioning, - visibility=request.visibility, - permissions=request.permissions, - disposition=request.disposition, - metadata=request.metadata, + location=table_definition.location, + partitioning=table_definition.partitioning, + visibility=table_definition.visibility, + permissions=table_definition.permissions, + disposition=table_definition.disposition, + metadata=table_definition.metadata, current_commit_sha=None, - current_schema=request.table_schema, + current_schema=table_definition.table_schema, last_updated_ms=int(time.time_ns() / 1e6), - encryption_details=request.encryption_details, + encryption_details=table_definition.encryption_details, ) # Save the table to the Catalog catalog_provider.update_table(table_id=new_table.table_id, entry=new_table) # create the metadata folder, put a file with the table name in there - storage_provider.write_blob(f"{METADATA_ROOT}/{owner}/{table_id}/{request.name}", b"") + storage_provider.write_blob(f"{METADATA_ROOT}/{owner}/{table_id}/{table_definition.name}", b"") + + # trigger webhooks - this should be async so we don't wait for the outcome + owner_entry.notify_subscribers( + owner_entry.EventTypes.DATASET_CREATED, + { + "event": "DATASET_CREATED", + "table": f"{owner}.{table_definition.name}", + "url": f"{base_url}/v1/tables/{owner}/{table_definition}", + }, + ) return { "message": "Table Created", - "table": f"{owner}.{request.name}", + "table": f"{owner}.{table_definition.name}", } @@ -176,8 +188,10 @@ async def delete_table( The metadata and data files for this table is NOT deleted. """ + from tarchia.utils.catalogs import identify_owner from tarchia.utils.catalogs import identify_table + owner_entry = identify_owner(name=owner) catalog_entry = identify_table(owner=owner, table=table) table_id = catalog_entry.table_id @@ -189,6 +203,12 @@ async def delete_table( f"{METADATA_ROOT}/{owner}/{table_id}/deleted.json", catalog_entry.serialize() ) + # trigger webhooks - this should be async so we don't wait for the outcome + owner_entry.notify_subscribers( + owner_entry.EventTypes.DATASET_DELETED, + {"event": "DATASET_DELETED", "table": f"{owner}.{table}"}, + ) + return { "message": "Table Deleted", "table": f"{owner}.{table}", diff --git a/tests/components/test_eventable.py b/tests/components/test_eventable.py new file mode 100644 index 0000000..02463d9 --- /dev/null +++ b/tests/components/test_eventable.py @@ -0,0 +1,80 @@ + +import sys +import os +import pytest +from orso.types import OrsoTypes + +os.environ["CATALOG_NAME"] = "test_catalog.json" +os.environ["TARCHIA_DEBUG"] = "TRUE" + +sys.path.insert(1, os.path.join(sys.path[0], "../..")) +import pytest +from unittest.mock import patch, call + +from tarchia.models import TableCatalogEntry, Schema, Column + +def test_subscribe(): + eventable = TableCatalogEntry( + name="test", steward="test", owner="test", table_id="test", location="test", partitioning=[], last_updated_ms=0, permissions=[], visibility="PUBLIC", current_schema=Schema(columns=[]) + ) + eventable.subscribe(user='user1', event='NEW_COMMIT', url='http://example.com/webhook_created') + assert len(eventable.subscriptions) == 1 + assert eventable.subscriptions[0].user == 'user1' + assert eventable.subscriptions[0].event == 'NEW_COMMIT' + assert eventable.subscriptions[0].url == 'http://example.com/webhook_created' + +def test_unsubscribe(): + eventable = TableCatalogEntry( + name="test", steward="test", owner="test", table_id="test", location="test", partitioning=[], last_updated_ms=0, permissions=[], visibility="PUBLIC", current_schema=Schema(columns=[]) + ) + eventable.subscribe(user='user1', event='NEW_COMMIT', url='http://example.com/webhook_created') + eventable.unsubscribe(user='user1', event='NEW_COMMIT', url='http://example.com/webhook_created') + assert len(eventable.subscriptions) == 0 + +def test_trigger_event(): + eventable = TableCatalogEntry( + name="test", steward="test", owner="test", table_id="test", location="test", partitioning=[], last_updated_ms=0, permissions=[], visibility="PUBLIC", current_schema=Schema(columns=[]) + ) + eventable.subscribe(user='user1', event='NEW_COMMIT', url='http://example.com/webhook_created') + + with patch('requests.post') as mock_post: + eventable.trigger_event('NEW_COMMIT', {'table_id': '123', 'name': 'ExampleTable'}) + assert mock_post.called + mock_post.assert_called_with('http://example.com/webhook_created', json={'table_id': '123', 'name': 'ExampleTable'}, timeout=10) + +def test_trigger_event_not_supported(): + eventable = TableCatalogEntry( + name="test", steward="test", owner="test", table_id="test", location="test", partitioning=[], last_updated_ms=0, permissions=[], visibility="PUBLIC", current_schema=Schema(columns=[]) + ) + with pytest.raises(ValueError): + eventable.trigger_event('unsupported_event', {'table_id': '123', 'name': 'ExampleTable'}) + +def test_subscribe_not_supported_event(): + eventable = TableCatalogEntry( + name="test", steward="test", owner="test", table_id="test", location="test", partitioning=[], last_updated_ms=0, permissions=[], visibility="PUBLIC", current_schema=Schema(columns=[]) + ) + with pytest.raises(ValueError): + eventable.subscribe(user='user1', event='unsupported_event', url='http://example.com/webhook') + +def test_notify_subscribers(): + eventable = TableCatalogEntry( + name="test", steward="test", owner="test", table_id="test", location="test", partitioning=[], last_updated_ms=0, permissions=[], visibility="PUBLIC", current_schema=Schema(columns=[]) + ) + with patch.object(eventable, '_send_request_with_retries') as mock_send_request: + eventable.notify_subscribers('http://example.com/webhook', {'table_id': '123', 'name': 'ExampleTable'}) + eventable._executor.shutdown(wait=True) + mock_send_request.assert_called_once_with('http://example.com/webhook', {'table_id': '123', 'name': 'ExampleTable'}) + +def test_send_request_with_retries(): + eventable = TableCatalogEntry( + name="test", steward="test", owner="test", table_id="test", location="test", partitioning=[], last_updated_ms=0, permissions=[], visibility="PUBLIC", current_schema=Schema(columns=[]) + ) + with patch('requests.post') as mock_post: + mock_post.return_value.status_code = 200 + eventable._send_request_with_retries('http://example.com/webhook', {'table_id': '123', 'name': 'ExampleTable'}) + mock_post.assert_called_with('http://example.com/webhook', json={'table_id': '123', 'name': 'ExampleTable'}, timeout=10) + +if __name__ == "__main__": # pragma: no cover + from tests.tools import run_tests + + run_tests() \ No newline at end of file From 868ea441324badc073cbed33505b111c1f91255f Mon Sep 17 00:00:00 2001 From: XB500 Date: Mon, 22 Jul 2024 20:40:36 +0000 Subject: [PATCH 02/16] Tarchia Version 0.0.0-alpha.123 --- tarchia/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tarchia/__version__.py b/tarchia/__version__.py index 4161138..568abc4 100644 --- a/tarchia/__version__.py +++ b/tarchia/__version__.py @@ -1,4 +1,4 @@ -__build__ = 122 +__build__ = 123 # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From a7d2adc02863411d4c95a8cb7c0cff54a47d808b Mon Sep 17 00:00:00 2001 From: joocer Date: Tue, 23 Jul 2024 18:54:44 +0100 Subject: [PATCH 03/16] restructure --- README.md | 11 + main.py | 8 +- tarchia/{v1 => api}/__init__.py | 0 tarchia/{ => api}/middlewares/__init__.py | 0 .../{ => api}/middlewares/audit_middleware.py | 0 .../middlewares/authorization_middleware.py | 0 tarchia/{v1/routes => api/v1}/__init__.py | 0 tarchia/api/v1/branch_management.py | 5 + .../routes => api/v1}/commit_management.py | 22 +- .../{v1/routes => api/v1}/data_management.py | 24 +- .../{v1/routes => api/v1}/owner_management.py | 10 +- .../{v1/routes => api/v1}/table_management.py | 10 +- tarchia/history/__init__.py | 1 - tarchia/{ => interfaces}/catalog/__init__.py | 6 +- .../{ => interfaces}/catalog/dev_catalog.py | 4 +- .../{ => interfaces}/catalog/gcs_firestore.py | 2 +- .../{ => interfaces}/catalog/provider_base.py | 0 tarchia/{ => interfaces}/storage/__init__.py | 2 +- .../storage/google_cloud_storage.py | 2 +- .../{ => interfaces}/storage/local_storage.py | 0 .../storage/storage_provider.py | 0 tarchia/{history => metadata}/history.py | 16 +- tarchia/{ => metadata}/manifests/__init__.py | 6 +- tarchia/{ => metadata}/manifests/pruning.py | 2 +- tarchia/{ => metadata}/schemas.py | 0 tarchia/models/__init__.py | 4 +- tarchia/models/eventable.py | 2 +- tarchia/models/history_models.py | 93 ++++ .../models.py => models/manifest_models.py} | 17 +- tarchia/models/metadata_models.py | 57 -- tarchia/models/request_models.py | 6 +- tarchia/scanners/expectations/evaluate.py | 72 +++ tarchia/scanners/expectations/rules.py | 514 ++++++++++++++++++ tarchia/scanners/secrets/fides.py | 71 +++ tarchia/scanners/secrets/rules.yara | 70 +++ tarchia/utils/__init__.py | 2 +- tarchia/utils/catalogs.py | 2 +- tarchia/{ => utils}/config.py | 0 tarchia/{ => utils}/constants.py | 0 tarchia/v1/routes/branch_management.py | 7 - tests/utils/test_is_identifier.py | 2 +- 41 files changed, 900 insertions(+), 150 deletions(-) rename tarchia/{v1 => api}/__init__.py (100%) rename tarchia/{ => api}/middlewares/__init__.py (100%) rename tarchia/{ => api}/middlewares/audit_middleware.py (100%) rename tarchia/{ => api}/middlewares/authorization_middleware.py (100%) rename tarchia/{v1/routes => api/v1}/__init__.py (100%) create mode 100644 tarchia/api/v1/branch_management.py rename tarchia/{v1/routes => api/v1}/commit_management.py (89%) rename tarchia/{v1/routes => api/v1}/data_management.py (94%) rename tarchia/{v1/routes => api/v1}/owner_management.py (93%) rename tarchia/{v1/routes => api/v1}/table_management.py (97%) delete mode 100644 tarchia/history/__init__.py rename tarchia/{ => interfaces}/catalog/__init__.py (69%) rename tarchia/{ => interfaces}/catalog/dev_catalog.py (95%) rename tarchia/{ => interfaces}/catalog/gcs_firestore.py (99%) rename tarchia/{ => interfaces}/catalog/provider_base.py (100%) rename tarchia/{ => interfaces}/storage/__init__.py (98%) rename tarchia/{ => interfaces}/storage/google_cloud_storage.py (97%) rename tarchia/{ => interfaces}/storage/local_storage.py (100%) rename tarchia/{ => interfaces}/storage/storage_provider.py (100%) rename tarchia/{history => metadata}/history.py (94%) rename tarchia/{ => metadata}/manifests/__init__.py (96%) rename tarchia/{ => metadata}/manifests/pruning.py (97%) rename tarchia/{ => metadata}/schemas.py (100%) create mode 100644 tarchia/models/history_models.py rename tarchia/{manifests/models.py => models/manifest_models.py} (85%) create mode 100644 tarchia/scanners/expectations/evaluate.py create mode 100644 tarchia/scanners/expectations/rules.py create mode 100644 tarchia/scanners/secrets/fides.py create mode 100644 tarchia/scanners/secrets/rules.yara rename tarchia/{ => utils}/config.py (100%) rename tarchia/{ => utils}/constants.py (100%) delete mode 100644 tarchia/v1/routes/branch_management.py diff --git a/README.md b/README.md index dea9d90..ee88433 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,16 @@ # Tarchia + + Tarchia is an Active Data Catalog. Tarchia actively manages and catalogs data in real-time. Unlike traditional catalogs that serve merely as passive records, our Active Data Catalog is essential to the operational workflow, ensuring meta data is always up-to-date and readily accessible for system processes. diff --git a/main.py b/main.py index 3fb5a37..d685ff6 100644 --- a/main.py +++ b/main.py @@ -37,13 +37,13 @@ from uvicorn import run from tarchia import __version__ -from tarchia.middlewares import AuditMiddleware -from tarchia.middlewares import AuthorizationMiddleware -from tarchia.v1 import routes as v1_routes +from tarchia.api.middlewares import AuditMiddleware +from tarchia.api.middlewares import AuthorizationMiddleware +from tarchia.api.v1 import v1_router application = FastAPI(title="Tarchia Metastore", version=__version__) -application.include_router(v1_routes.v1_router) +application.include_router(v1_router) application.add_middleware(AuthorizationMiddleware) application.add_middleware(AuditMiddleware) diff --git a/tarchia/v1/__init__.py b/tarchia/api/__init__.py similarity index 100% rename from tarchia/v1/__init__.py rename to tarchia/api/__init__.py diff --git a/tarchia/middlewares/__init__.py b/tarchia/api/middlewares/__init__.py similarity index 100% rename from tarchia/middlewares/__init__.py rename to tarchia/api/middlewares/__init__.py diff --git a/tarchia/middlewares/audit_middleware.py b/tarchia/api/middlewares/audit_middleware.py similarity index 100% rename from tarchia/middlewares/audit_middleware.py rename to tarchia/api/middlewares/audit_middleware.py diff --git a/tarchia/middlewares/authorization_middleware.py b/tarchia/api/middlewares/authorization_middleware.py similarity index 100% rename from tarchia/middlewares/authorization_middleware.py rename to tarchia/api/middlewares/authorization_middleware.py diff --git a/tarchia/v1/routes/__init__.py b/tarchia/api/v1/__init__.py similarity index 100% rename from tarchia/v1/routes/__init__.py rename to tarchia/api/v1/__init__.py diff --git a/tarchia/api/v1/branch_management.py b/tarchia/api/v1/branch_management.py new file mode 100644 index 0000000..d6125f1 --- /dev/null +++ b/tarchia/api/v1/branch_management.py @@ -0,0 +1,5 @@ +""" +[GET] /repos/{owner}/{repo}/branch +[GET] /repos/{owner}/{repo}/branch/{branch}/head +[POST] /repos/{owner}/{repo}/merges => {"base", "head", "message"} +""" diff --git a/tarchia/v1/routes/commit_management.py b/tarchia/api/v1/commit_management.py similarity index 89% rename from tarchia/v1/routes/commit_management.py rename to tarchia/api/v1/commit_management.py index da92f0d..bfce3d3 100644 --- a/tarchia/v1/routes/commit_management.py +++ b/tarchia/api/v1/commit_management.py @@ -17,12 +17,12 @@ from fastapi import Request from fastapi.responses import ORJSONResponse -from tarchia.constants import COMMITS_ROOT -from tarchia.constants import HISTORY_ROOT -from tarchia.constants import IDENTIFIER_REG_EX -from tarchia.constants import MAIN_BRANCH from tarchia.exceptions import CommitNotFoundError from tarchia.models import Schema +from tarchia.utils.constants import COMMITS_ROOT +from tarchia.utils.constants import HISTORY_ROOT +from tarchia.utils.constants import IDENTIFIER_REG_EX +from tarchia.utils.constants import MAIN_BRANCH router = APIRouter() @@ -32,12 +32,12 @@ async def get_table_commit( request: Request, owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), table: str = Path(description="The name of the table.", pattern=IDENTIFIER_REG_EX), - commit_sha: Union[str, Literal["latest"]] = Path(description="The commit to retrieve."), + commit_sha: Union[str, Literal["head"]] = Path(description="The commit to retrieve."), filters: Optional[str] = Query(None, description="Filters to push to manifest reader"), ): - from tarchia.manifests import get_manifest - from tarchia.manifests.pruning import parse_filters - from tarchia.storage import storage_factory + from tarchia.interfaces.storage import storage_factory + from tarchia.metadata.manifests import get_manifest + from tarchia.metadata.manifests.pruning import parse_filters from tarchia.utils import build_root from tarchia.utils.catalogs import identify_table @@ -46,7 +46,7 @@ async def get_table_commit( # read the data from the catalog for this table catalog_entry = identify_table(owner, table) table_id = catalog_entry.table_id - if commit_sha == "latest": + if commit_sha == "head": commit_sha = catalog_entry.current_commit_sha commit_root = build_root(COMMITS_ROOT, owner=owner, table_id=table_id) @@ -91,8 +91,8 @@ async def get_list_of_table_commits( after: datetime.datetime = Query(None, description="Filter commits"), page_size: int = Query(100, description="Maximum items to show"), ): - from tarchia.history import HistoryTree - from tarchia.storage import storage_factory + from tarchia.interfaces.storage import storage_factory + from tarchia.metadata.history import HistoryTree from tarchia.utils import build_root from tarchia.utils.catalogs import identify_table diff --git a/tarchia/v1/routes/data_management.py b/tarchia/api/v1/data_management.py similarity index 94% rename from tarchia/v1/routes/data_management.py rename to tarchia/api/v1/data_management.py index cfa72b0..e83ba03 100644 --- a/tarchia/v1/routes/data_management.py +++ b/tarchia/api/v1/data_management.py @@ -8,16 +8,16 @@ from fastapi import HTTPException from fastapi import Request -from tarchia import config -from tarchia.constants import COMMITS_ROOT -from tarchia.constants import HISTORY_ROOT -from tarchia.constants import MAIN_BRANCH -from tarchia.constants import MANIFEST_ROOT from tarchia.exceptions import TransactionError from tarchia.models import CommitRequest from tarchia.models import StageFilesRequest from tarchia.models import TableRequest from tarchia.models import Transaction +from tarchia.utils import config +from tarchia.utils.constants import COMMITS_ROOT +from tarchia.utils.constants import HISTORY_ROOT +from tarchia.utils.constants import MAIN_BRANCH +from tarchia.utils.constants import MANIFEST_ROOT router = APIRouter() @@ -89,7 +89,7 @@ def load_old_commit(storage_provider, commit_root, parent_commit): def build_new_manifest(old_manifest, transaction, schema): - from tarchia.manifests import build_manifest_entry + from tarchia.metadata.manifests import build_manifest_entry existing_entries = {e.file_path for e in old_manifest} new_entries = [ @@ -128,7 +128,7 @@ def xor_hex_strings(hex_strings: List[str]) -> str: @router.post("/transactions/start") async def start_transaction(table: TableRequest): - from tarchia.storage import storage_factory + from tarchia.interfaces.storage import storage_factory from tarchia.utils import build_root from tarchia.utils import generate_uuid from tarchia.utils.catalogs import identify_table @@ -174,12 +174,12 @@ async def commit_transaction(request: Request, commit_request: CommitRequest): Returns: dict: Result of the transaction commit. """ - from tarchia.catalog import catalog_factory - from tarchia.history import HistoryTree - from tarchia.manifests import get_manifest - from tarchia.manifests import write_manifest + from tarchia.interfaces.catalog import catalog_factory + from tarchia.interfaces.storage import storage_factory + from tarchia.metadata.history import HistoryTree + from tarchia.metadata.manifests import get_manifest + from tarchia.metadata.manifests import write_manifest from tarchia.models import Commit - from tarchia.storage import storage_factory from tarchia.utils import build_root from tarchia.utils import generate_uuid from tarchia.utils.catalogs import identify_table diff --git a/tarchia/v1/routes/owner_management.py b/tarchia/api/v1/owner_management.py similarity index 93% rename from tarchia/v1/routes/owner_management.py rename to tarchia/api/v1/owner_management.py index da47092..40a7d9c 100644 --- a/tarchia/v1/routes/owner_management.py +++ b/tarchia/api/v1/owner_management.py @@ -22,12 +22,12 @@ from fastapi import Request from fastapi.responses import ORJSONResponse -from tarchia.config import METADATA_ROOT -from tarchia.constants import IDENTIFIER_REG_EX from tarchia.exceptions import AlreadyExistsError from tarchia.models import CreateOwnerRequest from tarchia.models import OwnerEntry from tarchia.models import UpdateValueRequest +from tarchia.utils.config import METADATA_ROOT +from tarchia.utils.constants import IDENTIFIER_REG_EX router = APIRouter() @@ -44,7 +44,7 @@ async def create_owner(request: CreateOwnerRequest): Returns: JSON response with a message and owner name. """ - from tarchia.catalog import catalog_factory + from tarchia.interfaces.catalog import catalog_factory from tarchia.utils import generate_uuid catalog_provider = catalog_factory() @@ -105,7 +105,7 @@ async def update_owner(owner: str, attribute: str, request: UpdateValueRequest): Returns: JSON response with a message, owner name, and updated attribute. """ - from tarchia.catalog import catalog_factory + from tarchia.interfaces.catalog import catalog_factory from tarchia.utils.catalogs import identify_owner if attribute not in {"steward"}: @@ -131,7 +131,7 @@ async def delete_owner(owner: str): Returns: JSON response with a message and owner name. """ - from tarchia.catalog import catalog_factory + from tarchia.interfaces.catalog import catalog_factory from tarchia.utils.catalogs import identify_owner entry = identify_owner(owner) diff --git a/tarchia/v1/routes/table_management.py b/tarchia/api/v1/table_management.py similarity index 97% rename from tarchia/v1/routes/table_management.py rename to tarchia/api/v1/table_management.py index 370c052..5721188 100644 --- a/tarchia/v1/routes/table_management.py +++ b/tarchia/api/v1/table_management.py @@ -5,16 +5,16 @@ from fastapi import Request from fastapi.responses import ORJSONResponse -from tarchia.catalog import catalog_factory -from tarchia.config import METADATA_ROOT -from tarchia.constants import IDENTIFIER_REG_EX from tarchia.exceptions import AlreadyExistsError +from tarchia.interfaces.catalog import catalog_factory +from tarchia.interfaces.storage import storage_factory from tarchia.models import CreateTableRequest from tarchia.models import TableCatalogEntry from tarchia.models import UpdateMetadataRequest from tarchia.models import UpdateSchemaRequest from tarchia.models import UpdateValueRequest -from tarchia.storage import storage_factory +from tarchia.utils.config import METADATA_ROOT +from tarchia.utils.constants import IDENTIFIER_REG_EX router = APIRouter() catalog_provider = catalog_factory() @@ -221,7 +221,7 @@ async def update_schema( owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), table: str = Path(description="The name of the table.", pattern=IDENTIFIER_REG_EX), ): - from tarchia.schemas import validate_schema_update + from tarchia.metadata.schemas import validate_schema_update from tarchia.utils.catalogs import identify_table # is the new schema valid diff --git a/tarchia/history/__init__.py b/tarchia/history/__init__.py deleted file mode 100644 index f606a79..0000000 --- a/tarchia/history/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .history import HistoryTree diff --git a/tarchia/catalog/__init__.py b/tarchia/interfaces/catalog/__init__.py similarity index 69% rename from tarchia/catalog/__init__.py rename to tarchia/interfaces/catalog/__init__.py index d43259a..0ed0ab0 100644 --- a/tarchia/catalog/__init__.py +++ b/tarchia/interfaces/catalog/__init__.py @@ -1,14 +1,14 @@ -from tarchia import config from tarchia.exceptions import InvalidConfigurationError +from tarchia.utils import config def catalog_factory(): # pragma: no cover if config.CATALOG_PROVIDER is None or config.CATALOG_PROVIDER.upper() == "DEVELOPMENT": - from tarchia.catalog.dev_catalog import DevelopmentCatalogProvider + from tarchia.interfaces.catalog.dev_catalog import DevelopmentCatalogProvider return DevelopmentCatalogProvider(config.CATALOG_NAME or "catalog.json") if config.CATALOG_PROVIDER.upper() == "FIRESTORE": - from tarchia.catalog.gcs_firestore import FirestoreCatalogProvider + from tarchia.interfaces.catalog.gcs_firestore import FirestoreCatalogProvider return FirestoreCatalogProvider(config.CATALOG_NAME) raise InvalidConfigurationError(setting="CATALOG_PROVIDER") diff --git a/tarchia/catalog/dev_catalog.py b/tarchia/interfaces/catalog/dev_catalog.py similarity index 95% rename from tarchia/catalog/dev_catalog.py rename to tarchia/interfaces/catalog/dev_catalog.py index 91ac1d2..7318289 100644 --- a/tarchia/catalog/dev_catalog.py +++ b/tarchia/interfaces/catalog/dev_catalog.py @@ -6,8 +6,8 @@ from typing import List from typing import Optional -from tarchia.catalog.provider_base import CatalogProvider from tarchia.exceptions import AmbiguousTableError +from tarchia.interfaces.catalog.provider_base import CatalogProvider from tarchia.models import OwnerEntry from tarchia.models import TableCatalogEntry @@ -23,7 +23,7 @@ def __init__(self, db_path: str = None): from tarchia.utils.doc_store import DocumentStore if not db_path: - from tarchia.config import CATALOG_NAME + from tarchia.utils.config import CATALOG_NAME db_path = CATALOG_NAME diff --git a/tarchia/catalog/gcs_firestore.py b/tarchia/interfaces/catalog/gcs_firestore.py similarity index 99% rename from tarchia/catalog/gcs_firestore.py rename to tarchia/interfaces/catalog/gcs_firestore.py index 6d5d152..14f94c5 100644 --- a/tarchia/catalog/gcs_firestore.py +++ b/tarchia/interfaces/catalog/gcs_firestore.py @@ -5,13 +5,13 @@ from typing import List -from tarchia import config from tarchia.catalog.provider_base import CatalogProvider from tarchia.exceptions import AmbiguousTableError from tarchia.exceptions import MissingDependencyError from tarchia.exceptions import UnmetRequirementError from tarchia.models import OwnerEntry from tarchia.models import TableCatalogEntry +from tarchia.utils import config GCP_PROJECT_ID = config.GCP_PROJECT_ID diff --git a/tarchia/catalog/provider_base.py b/tarchia/interfaces/catalog/provider_base.py similarity index 100% rename from tarchia/catalog/provider_base.py rename to tarchia/interfaces/catalog/provider_base.py diff --git a/tarchia/storage/__init__.py b/tarchia/interfaces/storage/__init__.py similarity index 98% rename from tarchia/storage/__init__.py rename to tarchia/interfaces/storage/__init__.py index 26984c5..5364cb1 100644 --- a/tarchia/storage/__init__.py +++ b/tarchia/interfaces/storage/__init__.py @@ -1,7 +1,7 @@ from typing import Optional -from tarchia import config from tarchia.exceptions import InvalidConfigurationError +from tarchia.utils import config from .storage_provider import StorageProvider diff --git a/tarchia/storage/google_cloud_storage.py b/tarchia/interfaces/storage/google_cloud_storage.py similarity index 97% rename from tarchia/storage/google_cloud_storage.py rename to tarchia/interfaces/storage/google_cloud_storage.py index 9d73ee7..158a6bb 100644 --- a/tarchia/storage/google_cloud_storage.py +++ b/tarchia/interfaces/storage/google_cloud_storage.py @@ -1,6 +1,6 @@ import os -from tarchia.config import BUCKET_NAME +from tarchia.utils.config import BUCKET_NAME from .storage_provider import StorageProvider diff --git a/tarchia/storage/local_storage.py b/tarchia/interfaces/storage/local_storage.py similarity index 100% rename from tarchia/storage/local_storage.py rename to tarchia/interfaces/storage/local_storage.py diff --git a/tarchia/storage/storage_provider.py b/tarchia/interfaces/storage/storage_provider.py similarity index 100% rename from tarchia/storage/storage_provider.py rename to tarchia/interfaces/storage/storage_provider.py diff --git a/tarchia/history/history.py b/tarchia/metadata/history.py similarity index 94% rename from tarchia/history/history.py rename to tarchia/metadata/history.py index 5e8264f..2806aea 100644 --- a/tarchia/history/history.py +++ b/tarchia/metadata/history.py @@ -8,26 +8,12 @@ from fastavro import reader from fastavro import writer -from tarchia.constants import MAIN_BRANCH from tarchia.models import HistoryEntry +from tarchia.utils.constants import MAIN_BRANCH sys.path.insert(0, os.path.join(sys.path[0], "../..")) -HISTORY_SCHEMA = { - "type": "record", - "name": "Commit", - "fields": [ - {"name": "sha", "type": "string"}, - {"name": "branch", "type": "string"}, - {"name": "message", "type": "string"}, - {"name": "user", "type": "string"}, - {"name": "timestamp", "type": "int"}, - {"name": "parent_sha", "type": ["null", "string"], "default": None}, - ], -} - - class HistoryTree: def __init__(self, trunk_branch_name: str = MAIN_BRANCH): self.trunk_branch_name = trunk_branch_name diff --git a/tarchia/manifests/__init__.py b/tarchia/metadata/manifests/__init__.py similarity index 96% rename from tarchia/manifests/__init__.py rename to tarchia/metadata/manifests/__init__.py index 8925865..1a8ec2f 100644 --- a/tarchia/manifests/__init__.py +++ b/tarchia/metadata/manifests/__init__.py @@ -3,11 +3,11 @@ from typing import Tuple from tarchia.exceptions import DataError -from tarchia.manifests.models import EntryType -from tarchia.manifests.models import ManifestEntry from tarchia.manifests.pruning import prune from tarchia.models import Column from tarchia.models import Schema +from tarchia.models.manifest_models import EntryType +from tarchia.models.manifest_models import ManifestEntry from tarchia.storage import StorageProvider from tarchia.storage import storage_factory @@ -68,7 +68,7 @@ def write_manifest(location: str, storage_provider: StorageProvider, entries: Li import fastavro - from .models import MANIFEST_SCHEMA + from ..models.manifest_models import MANIFEST_SCHEMA stream = BytesIO() diff --git a/tarchia/manifests/pruning.py b/tarchia/metadata/manifests/pruning.py similarity index 97% rename from tarchia/manifests/pruning.py rename to tarchia/metadata/manifests/pruning.py index 7cf2798..152207d 100644 --- a/tarchia/manifests/pruning.py +++ b/tarchia/metadata/manifests/pruning.py @@ -5,7 +5,7 @@ from tarchia.models import Schema from tarchia.utils.to_int import to_int -from .models import ManifestEntry +from ..models.manifest_models import ManifestEntry def parse_value(field: str, value: Any, schema: Schema) -> int: diff --git a/tarchia/schemas.py b/tarchia/metadata/schemas.py similarity index 100% rename from tarchia/schemas.py rename to tarchia/metadata/schemas.py diff --git a/tarchia/models/__init__.py b/tarchia/models/__init__.py index 4c78d8b..9e6f2d3 100644 --- a/tarchia/models/__init__.py +++ b/tarchia/models/__init__.py @@ -1,8 +1,8 @@ +from .history_models import Commit +from .history_models import HistoryEntry from .metadata_models import Column -from .metadata_models import Commit from .metadata_models import DatasetPermissions from .metadata_models import EncryptionDetails -from .metadata_models import HistoryEntry from .metadata_models import OrsoTypes from .metadata_models import OwnerEntry from .metadata_models import OwnerType diff --git a/tarchia/models/eventable.py b/tarchia/models/eventable.py index bbfcbd9..5133442 100644 --- a/tarchia/models/eventable.py +++ b/tarchia/models/eventable.py @@ -108,7 +108,7 @@ def notify_subscribers(self, url: str, data: dict): max_tries=3, backoff_seconds=5, exponential_backoff=True, - max_backoff=8, + max_backoff=60, retry_exceptions=(ConnectionError, Timeout), ) def _send_request_with_retries(self, url: str, data: dict): diff --git a/tarchia/models/history_models.py b/tarchia/models/history_models.py new file mode 100644 index 0000000..aa4c0ec --- /dev/null +++ b/tarchia/models/history_models.py @@ -0,0 +1,93 @@ +from typing import List +from typing import Optional + +from pydantic import Field + +from .metadata_models import Schema +from .tarchia_base import TarchiaBaseModel + +HISTORY_SCHEMA = { + "type": "record", + "name": "Commit", + "fields": [ + {"name": "sha", "type": "string"}, + {"name": "branch", "type": "string"}, + {"name": "message", "type": "string"}, + {"name": "user", "type": "string"}, + {"name": "timestamp", "type": "int"}, + {"name": "parent_sha", "type": ["null", "string"], "default": None}, + ], +} + + +class EncryptionDetails(TarchiaBaseModel): + """ + Model representing encryption details. + + Attributes: + algorithm (str): The encryption algorithm used. + key_id (str): The identifier for the encryption key. + fields (List[str]): The fields to be encrypted. + """ + + algorithm: str + key_id: str + fields: List[str] + + +class Commit(TarchiaBaseModel): + """ + Model representing a commit. + """ + + data_hash: str + user: str + message: str + branch: str + parent_commit_sha: Optional[str] + last_updated_ms: int + manifest_path: Optional[str] + table_schema: Schema + encryption_details: Optional[EncryptionDetails] + commit_sha: Optional[str] = None + + added_files: Optional[List[str]] = Field(default_factory=list) + removed_files: Optional[List[str]] = Field(default_factory=list) + + def calculate_hash(self) -> str: + import hashlib + + hasher = hashlib.sha256() + hasher.update(self.data_hash.encode()) + hasher.update(self.message.encode()) + hasher.update(self.user.encode()) + hasher.update(self.branch.encode()) + hasher.update(str(self.last_updated_ms).encode()) + if self.parent_commit_sha: + hasher.update(self.parent_commit_sha.encode()) + return hasher.hexdigest() + + def __init__(self, **data): + super().__init__(**data) + self.commit_sha = self.calculate_hash() + + @property + def history_entry(self): + """Slimemed record for Merkle Tree""" + return HistoryEntry( + sha=self.commit_sha, + branch=self.branch, + message=self.message, + user=self.user, + timestamp=self.last_updated_ms, + parent_sha=self.parent_commit_sha, + ) + + +class HistoryEntry(TarchiaBaseModel): + sha: str + branch: str + message: str + user: str + timestamp: int + parent_sha: Optional[str] = None diff --git a/tarchia/manifests/models.py b/tarchia/models/manifest_models.py similarity index 85% rename from tarchia/manifests/models.py rename to tarchia/models/manifest_models.py index 6e98440..6ecb8a6 100644 --- a/tarchia/manifests/models.py +++ b/tarchia/models/manifest_models.py @@ -47,20 +47,18 @@ class ManifestEntry(TarchiaBaseModel): Attributes: file_path (str): The path to the file. - file_format (str): The format of the file (e.g., 'parquet', 'json'). - file_type (EntryType): The type of the entry (e.g., 'type1', 'type2'). - record_count (Optional[int]): The number of records in the file. Defaults to None. - file_size (Optional[int]): The size of the file in bytes. Defaults to None. + file_type (EntryType): The type of the entry (e.g., 'Data', 'Manifest'). + record_count (Optional[int]): The number of records in the file. Defaults to -1. + file_size (Optional[int]): The size of the file in bytes. Defaults to -1. sha256_checksum (Optional[str]): The SHA-256 checksum of the file. Defaults to None. lower_bounds (Dict[str, int]): A dictionary containing the lower bounds for data values. upper_bounds (Dict[str, int]): A dictionary containing the upper bounds for data values. """ file_path: str - file_format: str file_type: EntryType - record_count: Optional[int] = None - file_size: Optional[int] = None + record_count: int = -1 + file_size: int = -1 sha256_checksum: Optional[str] = None lower_bounds: Dict[str, int] = Field(default_factory=dict) upper_bounds: Dict[str, int] = Field(default_factory=dict) @@ -72,13 +70,12 @@ class ManifestEntry(TarchiaBaseModel): "name": "ManifestEntry", "fields": [ {"name": "file_path", "type": "string"}, - {"name": "file_format", "type": "string"}, { "name": "file_type", "type": {"type": "enum", "name": "EntryType", "symbols": ["Manifest", "Data"]}, }, - {"name": "record_count", "type": ["null", "int"], "default": None}, - {"name": "file_size", "type": ["null", "int"], "default": None}, + {"name": "record_count", "type": "int", "default": -1}, + {"name": "file_size", "type": "int", "default": -1}, {"name": "sha256_checksum", "type": ["null", "string"], "default": None}, {"name": "lower_bounds", "type": {"type": "map", "values": "long"}}, {"name": "upper_bounds", "type": {"type": "map", "values": "long"}}, diff --git a/tarchia/models/metadata_models.py b/tarchia/models/metadata_models.py index 68e6988..bf3dd3b 100644 --- a/tarchia/models/metadata_models.py +++ b/tarchia/models/metadata_models.py @@ -121,61 +121,6 @@ class Schema(TarchiaBaseModel): columns: List[Column] -class HistoryEntry(TarchiaBaseModel): - sha: str - branch: str - message: str - user: str - timestamp: int - parent_sha: Optional[str] = None - - -class Commit(TarchiaBaseModel): - """ - Model representing a commit. - """ - - data_hash: str - user: str - message: str - branch: str - parent_commit_sha: Optional[str] - last_updated_ms: int - manifest_path: Optional[str] - table_schema: Schema - encryption_details: Optional[EncryptionDetails] - commit_sha: Optional[str] = None - - def calculate_hash(self) -> str: - import hashlib - - hasher = hashlib.sha256() - hasher.update(self.data_hash.encode()) - hasher.update(self.message.encode()) - hasher.update(self.user.encode()) - hasher.update(self.branch.encode()) - hasher.update(str(self.last_updated_ms).encode()) - if self.parent_commit_sha: - hasher.update(self.parent_commit_sha.encode()) - return hasher.hexdigest() - - def __init__(self, **data): - super().__init__(**data) - self.commit_sha = self.calculate_hash() - - @property - def history_entry(self): - """Slimemed record for Merkle Tree""" - return HistoryEntry( - sha=self.commit_sha, - branch=self.branch, - message=self.message, - user=self.user, - timestamp=self.last_updated_ms, - parent_sha=self.parent_commit_sha, - ) - - class TableCatalogEntry(TarchiaBaseModel, Eventable): """ The Catalog entry for a table. @@ -197,10 +142,8 @@ class EventTypes(Enum): last_updated_ms: int permissions: List[DatasetPermissions] visibility: TableVisibility - current_schema: Schema current_commit_sha: Optional[str] = None current_history: Optional[str] = None - encryption_details: Optional[EncryptionDetails] = None format_version: int = Field(default=1) disposition: TableDisposition = Field(default=TableDisposition.SNAPSHOT) metadata: dict = Field(default_factory=dict) diff --git a/tarchia/models/request_models.py b/tarchia/models/request_models.py index f026d1d..3db907f 100644 --- a/tarchia/models/request_models.py +++ b/tarchia/models/request_models.py @@ -10,10 +10,8 @@ from .metadata_models import Column from .metadata_models import DatasetPermissions -from .metadata_models import EncryptionDetails from .metadata_models import OwnerType from .metadata_models import RolePermission -from .metadata_models import Schema from .metadata_models import TableDisposition from .metadata_models import TableVisibility from .tarchia_base import TarchiaBaseModel @@ -54,7 +52,6 @@ class CreateTableRequest(TarchiaBaseModel): name: str steward: str location: Optional[str] - table_schema: Schema visibility: TableVisibility = TableVisibility.PRIVATE partitioning: Optional[List[str]] = ["year", "month", "day"] disposition: TableDisposition = TableDisposition.SNAPSHOT @@ -62,10 +59,9 @@ class CreateTableRequest(TarchiaBaseModel): DatasetPermissions(role="*", permission=RolePermission.READ) ] metadata: Dict[str, Any] = Field(default_factory=dict) - encryption_details: Optional[EncryptionDetails] = None @field_validator("name") - def validate_name(cls, name): + def validate_name(self, name): """ Validate the table name to ensure it matches the required pattern. diff --git a/tarchia/scanners/expectations/evaluate.py b/tarchia/scanners/expectations/evaluate.py new file mode 100644 index 0000000..2bd9d6b --- /dev/null +++ b/tarchia/scanners/expectations/evaluate.py @@ -0,0 +1,72 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +from data_expectations import Expectations +from data_expectations.errors import ExpectationNotMetError +from data_expectations.errors import ExpectationNotUnderstoodError + +ALL_EXPECTATIONS = Expectations.all_expectations() + + +def evaluate_record( + expectations: Expectations, record: dict, suppress_errors: bool = False +) -> bool: + """ + Test a single record against a defined set of expectations. + + Args: + expectations: The Expectations instance. + record: The dictionary record to be tested. + all_expectations: The dictionary of all available expectations. + suppress_errors: Whether to suppress expectation errors and return False instead. + + Returns: + True if all expectations are met, False otherwise. + """ + for expectation_definition in expectations.set_of_expectations: + # get the name of the expectation + expectation = expectation_definition.expectation + + if expectation not in ALL_EXPECTATIONS: + raise ExpectationNotUnderstoodError(expectation=expectation) + + base_config = { + "row": record, + "column": expectation_definition.column, + **expectation_definition.config, + } + + if not ALL_EXPECTATIONS[expectation](**base_config): + if not suppress_errors: + raise ExpectationNotMetError(expectation, record) + return False # data failed to meet expectation + + return True + + +def evaluate_list( + expectations: Expectations, dictset: typing.Iterable[dict], suppress_errors: bool = False +) -> bool: + """ + Evaluate a set of records against a defined set of Expectations. + + Args: + expectations: The Expectations instance. + dictset: The iterable set of dictionary records to be tested. + suppress_errors: Whether to suppress expectation errors and return False for the entire set. + + Returns: + True if all records meet all Expectations, False otherwise. + """ + return all(evaluate_record(expectations, record, suppress_errors) for record in dictset) diff --git a/tarchia/scanners/expectations/rules.py b/tarchia/scanners/expectations/rules.py new file mode 100644 index 0000000..e23623d --- /dev/null +++ b/tarchia/scanners/expectations/rules.py @@ -0,0 +1,514 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Inspired by the Great Expectations library. + +Rather than testing for conformity through defining a schema, expectations are a set +of assertions we can apply to our data. + +Whilst a schema-based approach isn't exactly procedural, expectations are a more +declarative way to define valid data. + +These assertions can also define a schema (we can expect a set of columns, each with +an expected type), but they also allow us to have more complex assertions, such as +the values in a set of columns should add to 100, or the values in a column are +increasing. + +This is designed to be applied to streaming data as each record passes through a point +in a flow - as such it is not intended to test an entire dataset at once to test its +validity, and some assertions are impractical - for example an expectation of the mean +of all of the values in a table. + +- if data doesn't match, I'm not cross, I'm just disappointed. +""" + +import json +import re +from dataclasses import is_dataclass +from inspect import getmembers +from typing import Any +from typing import Dict +from typing import Iterable +from typing import List +from typing import Union + +from data_expectations.internals.models import Expectation +from data_expectations.internals.text import sql_like_to_regex + +GLOBAL_TRACKER: Dict[str, Any] = {} + + +def track_previous(func): + def wrapper(*args, **kwargs): + column = kwargs.get("column") + key = f"{func.__name__}/{str(column)}" + if "previous_value" in kwargs: + previous_value = kwargs.pop("previous_value") + else: + previous_value = GLOBAL_TRACKER.get(key) + result = func(previous_value=previous_value, *args, **kwargs) + GLOBAL_TRACKER[key] = kwargs.get("row", {}).get(column) or previous_value + return result + + return wrapper + + +class Expectations: + def __init__(self, set_of_expectations: Iterable[Union[str, dict, Expectation]]): + self.set_of_expectations: List[Expectation] = [] + for exp in set_of_expectations: + if isinstance(exp, str): # Parse JSON string + exp = json.loads(exp) + + if isinstance(exp, dict): # Convert dict to Expectation + self.set_of_expectations.append(Expectation.load(exp)) + elif is_dataclass(exp) and isinstance(exp, Expectation): + self.set_of_expectations.append(exp) + + @classmethod + def all_expectations(cls): + """ + Programmatically get the list of expectations and build them into a dictionary. + We then use this dictionary to look up the methods to test the expectations in + the set of expectations for a dataset. + """ + expectations = {} + for handle, member in getmembers(cls): + if callable(member) and handle.startswith("expect_"): + expectations[handle] = member + return expectations + + @staticmethod + def reset(): + global GLOBAL_TRACKER + GLOBAL_TRACKER = {} + + ################################################################################### + # COLUMN EXPECTATIONS + ################################################################################### + + @staticmethod + def expect_column_to_exist( + *, + row: dict, + column: str, + **kwargs, + ): + """ + Confirms that a specified column exists in the record. + + Parameters: + row: dict + The record to be checked. + column: str + Name of the column to check for existence. + + Returns: bool + True if column exists, False otherwise. + """ + if isinstance(row, dict): + return column in row + return False + + @staticmethod + def expect_column_values_to_not_be_null( + *, + row: dict, + column: str, + **kwargs, + ): + """ + Confirms that the value in a column is not null. Non-existent values are considered null. + + Parameters: + row: dict + The record containing the column. + column: str + The column's name whose value should not be null. + + Returns: bool + True if the value in the column is not null, False otherwise. + """ + return row.get(column) is not None + + @staticmethod + def expect_column_values_to_be_of_type( + *, + row: dict, + column: str, + expected_type, + ignore_nulls: bool = True, + **kwargs, + ): + """ + Confirms that the value in a specific column is of the expected type. + + Parameters: + row: dict + The record to be checked. + column: str + The column's name to validate the type of its value. + expected_type: + Expected type of the column value. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + + Returns: bool + True if the type matches or if the value is null and ignore_nulls is True, False otherwise. + """ + value = row.get(column) + if value is not None: + return type(value).__name__ == expected_type + return ignore_nulls + + @staticmethod + def expect_column_values_to_be_in_type_list( + *, + row: dict, + column: str, + type_list: Iterable, + ignore_nulls: bool = True, + **kwargs, + ): + """ + Confirms that the type of value in a specific column is one of the specified types. + + Parameters: + row: dict + The record to validate. + column: str + The column's name to validate the type of its value. + type_list: Iterable + List of expected types for the column value. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + + Returns: bool + True if the type is in the type list or if the value is null and ignore_nulls is True, False otherwise. + """ + value = row.get(column) + if value is not None: + return type(value).__name__ in type_list + return ignore_nulls + + @staticmethod + def expect_column_values_to_be_between( + *, + row: dict, + column: str, + minimum, + maximum, + ignore_nulls: bool = True, + **kwargs, + ): + """ + Confirms that the value in a specific column is between two values. + + Parameters: + row: dict + The record to check. + column: str + The column's name to validate its value. + minimum: + Lower bound of the value. + maximum: + Upper bound of the value. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + + Returns: bool + True if the value is between the two bounds or if the value is null and ignore_nulls is True, False otherwise. + """ + value = row.get(column) + if value is not None: + return value >= minimum and value <= maximum + return ignore_nulls + + @staticmethod + @track_previous + def expect_column_values_to_be_increasing( + *, + row: dict, + column: str, + ignore_nulls: bool = True, + previous_value=None, + **kwargs, + ): + """ + Confirms that the values in a specific column are in an increasing order. + + Parameters: + row: dict + The record to validate. + column: str + The column's name to validate its value. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + previous_value: [type] + The value of the column from the previous record. + + Returns: bool + True if the current value is greater than or equal to the previous value or if the value is null and ignore_nulls is True. False otherwise. + """ + value = row.get(column) + if value is not None: + return previous_value is None or previous_value <= value + return ignore_nulls + + @staticmethod + @track_previous + def expect_column_values_to_be_decreasing( + *, + row: dict, + column: str, + ignore_nulls: bool = True, + previous_value=None, + **kwargs, + ): + """ + Confirms that the values in a specific column are in a decreasing order. + + Parameters: + row: dict + The record to validate. + column: str + The column's name to validate its value. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + previous_value: [type] + The value of the column from the previous record. + + Returns: bool + True if the current value is less than or equal to the previous value or if the value is null and ignore_nulls is True. False otherwise. + """ + value = row.get(column) + if value is not None: + return previous_value is None or previous_value >= value + return ignore_nulls + + @staticmethod + def expect_column_values_to_be_in_set( + *, + row: dict, + column: str, + symbols: Iterable, + ignore_nulls: bool = True, + **kwargs, + ): + """ + Confirms that the value in a specific column is within a predefined set. + + Parameters: + row: dict + The record to check. + column: str + The column's name to validate its value. + symbols: Iterable + The set of allowed values for the column. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + + Returns: bool + True if the value is in the provided set or if the value is null and ignore_nulls is True, False otherwise. + """ + value = row.get(column) + if value is not None: + return value in symbols + return ignore_nulls + + @staticmethod + def expect_column_values_to_match_regex( + *, + row: dict, + column: str, + regex: str, + ignore_nulls: bool = True, + **kwargs, + ): + """ + Confirms that the value in a specific column matches a given regular expression pattern. + + Parameters: + row: dict + The record to validate. + column: str + The column's name to validate its value. + regex: str + The regular expression pattern to match against the column's value. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + + Returns: bool + True if the value matches the regex or if the value is null and ignore_nulls is True, False otherwise. + """ + value = row.get(column) + if value is not None: + return re.compile(regex).match(str(value)) is not None + return ignore_nulls + + @staticmethod + def expect_column_values_to_match_like( + *, + row: dict, + column: str, + like: str, + ignore_nulls: bool = True, + **kwargs, + ): + """ + Confirms that the value in a specific column matches a given SQL-like pattern. + + Parameters: + row: dict + The record to validate. + column: str + The column's name to validate its value. + like: str + The SQL-like pattern to match against the column's value. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + + Returns: bool + True if the value matches the pattern or if the value is null and ignore_nulls is True, False otherwise. + """ + value = row.get(column) + if value is not None: + return sql_like_to_regex(like).match(str(value)) is not None + return ignore_nulls + + @staticmethod + def expect_column_values_length_to_be( + *, + row: dict, + column: str, + length: int, + ignore_nulls: bool = True, + **kwargs, + ): + """ + Confirms that the length of the value in a specific column is equal to a specified length. + + Parameters: + row: dict + The record to validate. + column: str + The column's name to validate its value length. + length: int + The expected length for the column's value. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + + Returns: bool + True if the length of the value matches the specified length or if the value is null and ignore_nulls is True, False otherwise. + """ + value = row.get(column) + if value is not None: + if not hasattr(value, "__len__"): + value = str(value) + return len(value) == length + return ignore_nulls + + @staticmethod + def expect_column_values_length_to_be_between( + *, + row: dict, + column: str, + minimum: int, + maximum: int, + ignore_nulls: bool = True, + **kwargs, + ): + """ + Confirms that the length of the value in a specific column falls within a specified range. + + Parameters: + row: dict + The record to validate. + column: str + The column's name to validate its value length. + minimum: int + The minimum acceptable length for the column's value. + maximum: int + The maximum acceptable length for the column's value. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + + Returns: bool + True if the length of the value is within the specified range or if the value is null and ignore_nulls is True, False otherwise. + """ + value = row.get(column) + if value is not None: + if not hasattr(value, "__len__"): + value = str(value) + return len(value) >= minimum and len(value) <= maximum + return ignore_nulls + + @staticmethod + def expect_column_values_to_be_more_than( + *, + row: dict, + column: str, + threshold, + ignore_nulls: bool = True, + **kwargs, + ): + """ + Confirms that the value in a specific column is greater than a given threshold. + + Parameters: + row: dict + The record to validate. + column: str + The column's name to validate its value. + threshold: [type] + The minimum acceptable value for the column. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + + Returns: bool + True if the value is greater than the threshold or if the value is null and ignore_nulls is True, False otherwise. + """ + value = row.get(column) + if value is not None: + return value > threshold + return ignore_nulls + + @staticmethod + def expect_column_values_to_be_less_than( + *, + row: dict, + column: str, + threshold, + ignore_nulls: bool = True, + **kwargs, + ): + """ + Confirms that the value in a specific column is less than a given threshold. + + Parameters: + row: dict + The record to validate. + column: str + The column's name to validate its value. + threshold: [type] + The maximum acceptable value for the column. + ignore_nulls: bool + If True, null values will not cause the expectation to fail. + + Returns: bool + True if the value is less than the threshold or if the value is null and ignore_nulls is True, False otherwise. + """ + value = row.get(column) + if value is not None: + return value < threshold + return ignore_nulls diff --git a/tarchia/scanners/secrets/fides.py b/tarchia/scanners/secrets/fides.py new file mode 100644 index 0000000..d57308c --- /dev/null +++ b/tarchia/scanners/secrets/fides.py @@ -0,0 +1,71 @@ +import glob +import os +import sys +from typing import Optional +from urllib.error import URLError +from urllib.request import urlopen + +import yara + +RULE_URL: str = ( + "https://raw.githubusercontent.com/joocer/fides/main/rules/Leaked%20Secrets%20(SECRETS).yar" +) + + +def download_file(url: str) -> Optional[str]: + """ + Downloads a file given a URL and returns its content as a string. + + Parameters: + url: str + URL of the file to download. + + Returns: + Content of the file as a string if successful, otherwise None. + """ + try: + with urlopen(url) as response: + if response.status == 200: + return response.read().decode("utf-8") + except URLError: + return None + return None + + +def main(): + found_secrets = False + rule_file = download_file(RULE_URL) + if rule_file is None: + print("Failed to download rule file.") + sys.exit(1) + + rules = yara.compile(source=rule_file) + + for file_name in glob.iglob("**", recursive=True): + if not os.path.isfile(file_name): + continue + with open(file_name, "rb") as contents: + for line_counter, line in enumerate(contents.readlines()): + if len(line) > 1: + matches = rules.match(data=line) + for match in matches: + description = match.meta["description"] + if description != "Token Appears to be a Random String": + print( + f"\033[0;33m{description:40}\033[0m \033[0;31mFAIL\033[0m {file_name}:{line_counter + 1}" + ) + found_secrets = True + else: + print( + f"\033[0;35m{description:40}\033[0m \033[0;34mWARN\033[0m {file_name}:{line_counter + 1}" + ) + + if found_secrets: + print("\nSecrets Found") + sys.exit(1) + + print("No Secrets Found") + + +if __name__ == "__main__": + main() diff --git a/tarchia/scanners/secrets/rules.yara b/tarchia/scanners/secrets/rules.yara new file mode 100644 index 0000000..b48a54b --- /dev/null +++ b/tarchia/scanners/secrets/rules.yara @@ -0,0 +1,70 @@ +/* + Tests for passwords, hashes and secrets. + + Pattern RegExes mostly from + https://raw.githubusercontent.com/dxa4481/truffleHog/dev/scripts/searchOrg.py +*/ + +import "math" + +rule SECRETS01 : HIGH_ENTROPY_STRING +{ + meta: + author = "Joocer" + description = "Token Appears to be a Random String" + timestamp = "2020-10-27" + version = "0.01" + importance = "medium" + strings: + $token = /[A-Z0-9\=\_\-]{8,64}/ nocase + condition: + math.entropy(@token, !token) > 6 +} + +rule SECRETS02 : SECRETS +{ + meta: + author = "Joocer" + description = "Token Matches Known Secret Format" + timestamp = "2022-01-12" + version = "0.02" + importance = "high" + + strings: + $slack_token = /\b(xox[p|b|o|a]\-[0-9]{12}\-[0-9]{12}\-[0-9]{12}\-[a-z0-9]{32})\b/ + $facebook_oauth = /\bfacebook.{0,30}['\\"\\\\s][0-9a-f]{32}['\\"\\\\s]\b/ nocase + $twitter_oauth = /\btwitter.{0,30}['\\"\\\\s][0-9A-Z]{35,44}['\\"\\\\s]\b/ nocase + $github = /\bgithub.{0,30}['\\"\\\\s][0-9A-Z]{35,40}['\\"\\\\s]\b/ nocase + $github_pat = /\bghp_[0-9A-Z]{36}\b/ nocase + $google_oauth = /\b(\\"client_secret\\":\\"[a-zA-Z0-9-_]{24}\\")\b/ + $AWS_API_key = /\bAKIA[0-9A-Z]{16}\b/ + $heroku_API_key = /\bheroku.{0,30}[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}\b/ nocase + $slack_webhook = /\bhttps:\/\/hooks.slack.com\/services\/T[a-zA-Z0-9_]{8}\/B[a-zA-Z0-9_]{8}\/[a-zA-Z0-9_]{24}\b/ + $google_service_account = /\b\\"type\\": \\"service_account\\"\b/ + $password_in_URL = /\b[a-zA-Z]{3,10}:\/\/[^\/\\\\s:@]{3,20}:[^\/\\\\s:@]{3,20}@.{1,100}[\\"'\\\\s]\b/ + $oath_token = /\bya29\.[\w-]+\b/ nocase + $jwt_token = /\beyJ[0-9A-Z_-]{8,}\.eyJ[0-9A-Z_-]{8,}\.[0-9A-Z_-]{16,}\b/ nocase + + condition: + any of them +} + +rule SECRETS03 : KEY_FILES +{ + meta: + author = "Joocer" + description = "Token Matches Known Secret File Marker" + timestamp = "2020-10-27" + version = "0.01" + importance = "high" + + strings: + $RSA_private_key = "-----BEGIN RSA PRIVATE KEY-----" + $OPENSSH_private_key = "-----BEGIN OPENSSH PRIVATE KEY-----" + $DSA_private_key = "-----BEGIN DSA PRIVATE KEY-----" + $EC_private_key = "-----BEGIN EC PRIVATE KEY-----" + $PGP_private_key = "-----BEGIN PGP PRIVATE KEY BLOCK-----" + + condition: + any of them +} \ No newline at end of file diff --git a/tarchia/utils/__init__.py b/tarchia/utils/__init__.py index 293eb8b..e3ccaf5 100644 --- a/tarchia/utils/__init__.py +++ b/tarchia/utils/__init__.py @@ -1,6 +1,6 @@ import uuid -from tarchia.config import METADATA_ROOT +from tarchia.utils.config import METADATA_ROOT def generate_uuid() -> str: diff --git a/tarchia/utils/catalogs.py b/tarchia/utils/catalogs.py index 6a51516..3c6498d 100644 --- a/tarchia/utils/catalogs.py +++ b/tarchia/utils/catalogs.py @@ -1,6 +1,6 @@ -from tarchia.catalog import catalog_factory from tarchia.exceptions import OwnerNotFoundError from tarchia.exceptions import TableNotFoundError +from tarchia.interfaces.catalog import catalog_factory from tarchia.models import OwnerEntry from tarchia.models import TableCatalogEntry diff --git a/tarchia/config.py b/tarchia/utils/config.py similarity index 100% rename from tarchia/config.py rename to tarchia/utils/config.py diff --git a/tarchia/constants.py b/tarchia/utils/constants.py similarity index 100% rename from tarchia/constants.py rename to tarchia/utils/constants.py diff --git a/tarchia/v1/routes/branch_management.py b/tarchia/v1/routes/branch_management.py deleted file mode 100644 index f025f3e..0000000 --- a/tarchia/v1/routes/branch_management.py +++ /dev/null @@ -1,7 +0,0 @@ -""" -[GET] /repos/{owner}/{repo}/branches -[GET] /repos/{owner}/{repo}/branches/{branch} -[GET] /repos/{owner}/{repo}/branches/{branch}/commits -[GET] /repos/{owner}/{repo}/branches/{branch}/commits/{commit_sha} -[POST] /repos/{owner}/{repo}/merges => {"base", "head", "message"} -""" diff --git a/tests/utils/test_is_identifier.py b/tests/utils/test_is_identifier.py index d3d5bdd..8187ad5 100644 --- a/tests/utils/test_is_identifier.py +++ b/tests/utils/test_is_identifier.py @@ -12,7 +12,7 @@ sys.path.insert(1, os.path.join(sys.path[0], "../..")) -from tarchia.constants import IDENTIFIER_REG_EX +from tarchia.utils.constants import IDENTIFIER_REG_EX # fmt:off test_cases = [ From 17b7844a7c7a68ddcd48ac9a3a10d9c266fccd77 Mon Sep 17 00:00:00 2001 From: XB500 Date: Tue, 23 Jul 2024 17:55:02 +0000 Subject: [PATCH 04/16] Tarchia Version 0.0.0-alpha.124 --- tarchia/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tarchia/__version__.py b/tarchia/__version__.py index 568abc4..70671bc 100644 --- a/tarchia/__version__.py +++ b/tarchia/__version__.py @@ -1,4 +1,4 @@ -__build__ = 123 +__build__ = 124 # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From feb1107dcc3b0b274ca66d6cab98dd34c49ad328 Mon Sep 17 00:00:00 2001 From: joocer Date: Wed, 24 Jul 2024 00:35:41 +0100 Subject: [PATCH 05/16] branching --- README.md | 9 +++ tarchia/{ => actions}/compaction/__init__.py | 0 .../scanners/expectations/evaluate.py | 0 .../scanners/expectations/rules.py | 0 .../{ => actions}/scanners/secrets/fides.py | 0 .../{ => actions}/scanners/secrets/rules.yara | 0 tarchia/api/v1/data_management.py | 4 +- tarchia/api/v1/table_management.py | 64 ++++++++++++++----- tarchia/metadata/history.py | 1 + tarchia/models/__init__.py | 2 +- tarchia/models/eventable.py | 4 +- tarchia/models/history_models.py | 2 +- tarchia/models/metadata_models.py | 6 +- tarchia/models/request_models.py | 24 +++---- tests/endpoints/test_owner_endpoints.py | 2 + tests/endpoints/test_table_endpoints.py | 7 ++ 16 files changed, 88 insertions(+), 37 deletions(-) rename tarchia/{ => actions}/compaction/__init__.py (100%) rename tarchia/{ => actions}/scanners/expectations/evaluate.py (100%) rename tarchia/{ => actions}/scanners/expectations/rules.py (100%) rename tarchia/{ => actions}/scanners/secrets/fides.py (100%) rename tarchia/{ => actions}/scanners/secrets/rules.yara (100%) diff --git a/README.md b/README.md index ee88433..ba1fe20 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,14 @@ # Tarchia +Data As Code. + +- Data Changes as Commits +- Branching for Development +- Automated Testing +- Merging and Deployment + + + COMMITS(Commit History) + CATALOG --> PERMS(Permissions) CATALOG[(Catalog)] --> |Current| COMMIT(Commit) - CATALOG --> |Current| SCHEMA(Schema) subgraph COMMITS -..-> |Historical| COMMIT - COMMIT --> SCHEMA + COMMIT --> SCHEMA(Schema) + COMMIT --> ENCRYPTION(Encryption) COMMIT --> MAN_LIST(Manifest/List) end MAN_LIST --> DATA(Data Files) diff --git a/cloudbuild.yaml b/cloudbuild.yaml index 378e786..1130f8e 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -33,9 +33,9 @@ steps: "managed", "--allow-unauthenticated", "--timeout", - "300", + "60", "--cpu", - "1", + "2", "--memory", "1Gi", "--update-env-vars", diff --git a/tarchia/api/v1/commit_management.py b/tarchia/api/v1/commit_management.py index bfce3d3..87f41e0 100644 --- a/tarchia/api/v1/commit_management.py +++ b/tarchia/api/v1/commit_management.py @@ -1,8 +1,5 @@ """ -Commits only for the main branch. -[GET] /repos/{owner}/{repo}/commits -[GET] /repos/{owner}/{repo}/commits/{commit_sha} """ import datetime diff --git a/tarchia/api/v1/data_management.py b/tarchia/api/v1/data_management.py index 82c8e4e..f223b43 100644 --- a/tarchia/api/v1/data_management.py +++ b/tarchia/api/v1/data_management.py @@ -1,3 +1,10 @@ +""" +Transaction control + +- additions and deletions +- schema changes +""" + import base64 import hashlib import time @@ -329,3 +336,32 @@ async def truncate_all_files(encoded_transaction: str): "message": "Table truncated in Transaction", "encoded_transaction": new_encoded_transaction, } + + +@router.patch("/tables/{owner}/{table}/schema") +async def update_schema( + schema: Request, + owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), + table: str = Path(description="The name of the table.", pattern=IDENTIFIER_REG_EX), +): + from tarchia.metadata.schemas import validate_schema_update + from tarchia.utils.catalogs import identify_table + + # is the new schema valid + for col in schema.columns: + col.is_valid() + + catalog_entry = identify_table(owner=owner, table=table) + + # is the evolution valid + validate_schema_update(current_schema=catalog_entry.current_schema, updated_schema=schema) + + # update the schema + table_id = catalog_entry.table_id + catalog_entry.current_schema = schema + catalog_provider.update_table(table_id, catalog_entry) + + return { + "message": "Schema Updated", + "table": f"{owner}.{table}", + } \ No newline at end of file diff --git a/tarchia/api/v1/table_management.py b/tarchia/api/v1/table_management.py index be491f1..93a2995 100644 --- a/tarchia/api/v1/table_management.py +++ b/tarchia/api/v1/table_management.py @@ -247,35 +247,6 @@ async def delete_table( } -@router.patch("/tables/{owner}/{table}/schema") -async def update_schema( - schema: Request, - owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), - table: str = Path(description="The name of the table.", pattern=IDENTIFIER_REG_EX), -): - from tarchia.metadata.schemas import validate_schema_update - from tarchia.utils.catalogs import identify_table - - # is the new schema valid - for col in schema.columns: - col.is_valid() - - catalog_entry = identify_table(owner=owner, table=table) - - # is the evolution valid - validate_schema_update(current_schema=catalog_entry.current_schema, updated_schema=schema) - - # update the schema - table_id = catalog_entry.table_id - catalog_entry.current_schema = schema - catalog_provider.update_table(table_id, catalog_entry) - - return { - "message": "Schema Updated", - "table": f"{owner}.{table}", - } - - @router.patch("/tables/{owner}/{table}/metadata") async def update_metadata( metadata: UpdateMetadataRequest, diff --git a/tarchia/metadata/manifests/__init__.py b/tarchia/metadata/manifests/__init__.py index 1a8ec2f..0a91192 100644 --- a/tarchia/metadata/manifests/__init__.py +++ b/tarchia/metadata/manifests/__init__.py @@ -3,13 +3,13 @@ from typing import Tuple from tarchia.exceptions import DataError -from tarchia.manifests.pruning import prune +from tarchia.interfaces.storage import StorageProvider +from tarchia.interfaces.storage import storage_factory +from tarchia.metadata.manifests.pruning import prune from tarchia.models import Column from tarchia.models import Schema from tarchia.models.manifest_models import EntryType from tarchia.models.manifest_models import ManifestEntry -from tarchia.storage import StorageProvider -from tarchia.storage import storage_factory def get_manifest( diff --git a/tarchia/metadata/manifests/pruning.py b/tarchia/metadata/manifests/pruning.py index 152207d..639531a 100644 --- a/tarchia/metadata/manifests/pruning.py +++ b/tarchia/metadata/manifests/pruning.py @@ -3,10 +3,9 @@ from typing import Tuple from tarchia.models import Schema +from tarchia.models.manifest_models import ManifestEntry from tarchia.utils.to_int import to_int -from ..models.manifest_models import ManifestEntry - def parse_value(field: str, value: Any, schema: Schema) -> int: for column in schema.columns: diff --git a/tarchia/models/eventable.py b/tarchia/models/eventable.py index 5e74d61..fdebf96 100644 --- a/tarchia/models/eventable.py +++ b/tarchia/models/eventable.py @@ -25,9 +25,11 @@ import concurrent.futures from enum import Enum from typing import List +from typing import Union import orjson import requests +from orso.tools import counter from orso.tools import retry from pydantic import BaseModel from requests.exceptions import ConnectionError @@ -71,29 +73,50 @@ def _ensure_executor(cls): if cls._executor is None or cls._executor._shutdown: cls._executor = concurrent.futures.ThreadPoolExecutor(max_workers=4) - def subscribe(self, user: str, event: str, url: str): + def subscribe(self, user: str, event: Union[str, EventTypes], url: str): """Subscribe a URL to a specific event for a user.""" - if not event in self.EventTypes.__members__: - raise ValueError(f"Event '{event}' is not supported.") + if isinstance(event, str): + event = event.upper() # Ensure the event string is in lowercase + try: + event = self.EventTypes(event) + except ValueError: + raise ValueError(f"Event '{event}' is not supported.") + elif not isinstance(event, self.EventTypes): + raise TypeError(f"Event must be a string or an instance of {self.EventTypes}.") + if not is_valid_url(url): raise ValueError(f"URL does not appear to be valid") - subscription = Subscription(user=user, event=event, url=url) + subscription = Subscription(user=user, event=event.value, url=url) self.subscriptions.append(subscription) - def unsubscribe(self, user: str, event: str, url: str): + def unsubscribe(self, user: str, event: Union[str, EventTypes], url: str): """Unsubscribe a URL from a specific event for a user.""" + if isinstance(event, str): + event = event.upper() # Ensure the event string is in lowercase + try: + event = self.EventTypes(event) + except ValueError: + raise ValueError(f"Event '{event}' is not supported.") + self.subscriptions = [ s for s in self.subscriptions - if not (s.user == user and s.event == event and s.url == url) + if not (s.user == user and s.event == event.value and s.url == url) ] - def trigger_event(self, event: str, data: dict): + def trigger_event(self, event: Union[str, EventTypes], data: dict): """Trigger an event and notify all subscribers.""" - if event not in self.EventTypes: - raise ValueError(f"Event '{event}' is not supported.") + if isinstance(event, str): + event = event.upper() # Ensure the event string is in lowercase + try: + event = self.EventTypes(event) + except ValueError: + raise ValueError(f"Event '{event}' is not supported.") + elif not isinstance(event, self.EventTypes): + raise TypeError(f"Event must be a string or an instance of {self.EventTypes}.") + for subscription in self.subscriptions: - if subscription.event == event: + if subscription.event == event.value: self.notify_subscribers(subscription.url, data) def notify_subscribers(self, url: str, data: dict): diff --git a/tarchia/models/metadata_models.py b/tarchia/models/metadata_models.py index 707a832..7cc9097 100644 --- a/tarchia/models/metadata_models.py +++ b/tarchia/models/metadata_models.py @@ -121,7 +121,7 @@ class Schema(TarchiaBaseModel): columns: List[Column] -class TableCatalogEntry(TarchiaBaseModel, Eventable): +class TableCatalogEntry(Eventable, TarchiaBaseModel): """ The Catalog entry for a table. diff --git a/tests/components/test_eventable.py b/tests/components/test_eventable.py index 02463d9..0b9a369 100644 --- a/tests/components/test_eventable.py +++ b/tests/components/test_eventable.py @@ -2,6 +2,7 @@ import sys import os import pytest +import orjson from orso.types import OrsoTypes os.environ["CATALOG_NAME"] = "test_catalog.json" @@ -11,11 +12,11 @@ import pytest from unittest.mock import patch, call -from tarchia.models import TableCatalogEntry, Schema, Column +from tarchia.models import TableCatalogEntry, Schema, Column, OwnerEntry def test_subscribe(): eventable = TableCatalogEntry( - name="test", steward="test", owner="test", table_id="test", location="test", partitioning=[], last_updated_ms=0, permissions=[], visibility="PUBLIC", current_schema=Schema(columns=[]) + name="test", steward="test", owner="test", table_id="test", location="test", partitioning=[], last_updated_ms=0, permissions=[], visibility="PUBLIC", current_schema=Schema(columns=[]), freshness_life_in_days=0, retention_in_days=0 ) eventable.subscribe(user='user1', event='NEW_COMMIT', url='http://example.com/webhook_created') assert len(eventable.subscriptions) == 1 @@ -25,49 +26,47 @@ def test_subscribe(): def test_unsubscribe(): eventable = TableCatalogEntry( - name="test", steward="test", owner="test", table_id="test", location="test", partitioning=[], last_updated_ms=0, permissions=[], visibility="PUBLIC", current_schema=Schema(columns=[]) + name="test", steward="test", owner="test", table_id="test", location="test", partitioning=[], last_updated_ms=0, permissions=[], visibility="PUBLIC", current_schema=Schema(columns=[]), freshness_life_in_days=0, retention_in_days=0 ) eventable.subscribe(user='user1', event='NEW_COMMIT', url='http://example.com/webhook_created') eventable.unsubscribe(user='user1', event='NEW_COMMIT', url='http://example.com/webhook_created') assert len(eventable.subscriptions) == 0 def test_trigger_event(): + eventable = TableCatalogEntry( - name="test", steward="test", owner="test", table_id="test", location="test", partitioning=[], last_updated_ms=0, permissions=[], visibility="PUBLIC", current_schema=Schema(columns=[]) + name="test", steward="test", owner="test", table_id="test", location="test", partitioning=[], last_updated_ms=0, permissions=[], visibility="PUBLIC", current_schema=Schema(columns=[]), freshness_life_in_days=0, retention_in_days=0 ) - eventable.subscribe(user='user1', event='NEW_COMMIT', url='http://example.com/webhook_created') - - with patch('requests.post') as mock_post: - eventable.trigger_event('NEW_COMMIT', {'table_id': '123', 'name': 'ExampleTable'}) - assert mock_post.called - mock_post.assert_called_with('http://example.com/webhook_created', json={'table_id': '123', 'name': 'ExampleTable'}, timeout=10) + eventable.subscribe(user='user1', event=eventable.EventTypes.NEW_COMMIT, url='http://example.com/webhook_created') + eventable.trigger_event(eventable.EventTypes.NEW_COMMIT, {'table_id': '123', 'name': 'ExampleTable'}) + def test_trigger_event_not_supported(): eventable = TableCatalogEntry( - name="test", steward="test", owner="test", table_id="test", location="test", partitioning=[], last_updated_ms=0, permissions=[], visibility="PUBLIC", current_schema=Schema(columns=[]) + name="test", steward="test", owner="test", table_id="test", location="test", partitioning=[], last_updated_ms=0, permissions=[], visibility="PUBLIC", current_schema=Schema(columns=[]), freshness_life_in_days=0, retention_in_days=0 ) with pytest.raises(ValueError): eventable.trigger_event('unsupported_event', {'table_id': '123', 'name': 'ExampleTable'}) def test_subscribe_not_supported_event(): eventable = TableCatalogEntry( - name="test", steward="test", owner="test", table_id="test", location="test", partitioning=[], last_updated_ms=0, permissions=[], visibility="PUBLIC", current_schema=Schema(columns=[]) + name="test", steward="test", owner="test", table_id="test", location="test", partitioning=[], last_updated_ms=0, permissions=[], visibility="PUBLIC", current_schema=Schema(columns=[]), freshness_life_in_days=0, retention_in_days=0 ) with pytest.raises(ValueError): - eventable.subscribe(user='user1', event='unsupported_event', url='http://example.com/webhook') + eventable.subscribe(user='user1', event="TABLE_CREATED", url='http://example.com/webhook') def test_notify_subscribers(): eventable = TableCatalogEntry( - name="test", steward="test", owner="test", table_id="test", location="test", partitioning=[], last_updated_ms=0, permissions=[], visibility="PUBLIC", current_schema=Schema(columns=[]) + name="test", steward="test", owner="test", table_id="test", location="test", partitioning=[], last_updated_ms=0, permissions=[], visibility="PUBLIC", current_schema=Schema(columns=[]), freshness_life_in_days=0, retention_in_days=0 ) with patch.object(eventable, '_send_request_with_retries') as mock_send_request: eventable.notify_subscribers('http://example.com/webhook', {'table_id': '123', 'name': 'ExampleTable'}) eventable._executor.shutdown(wait=True) - mock_send_request.assert_called_once_with('http://example.com/webhook', {'table_id': '123', 'name': 'ExampleTable'}) + mock_send_request.assert_called_once_with('http://example.com/webhook', orjson.dumps({'table_id': '123', 'name': 'ExampleTable'})) def test_send_request_with_retries(): eventable = TableCatalogEntry( - name="test", steward="test", owner="test", table_id="test", location="test", partitioning=[], last_updated_ms=0, permissions=[], visibility="PUBLIC", current_schema=Schema(columns=[]) + name="test", steward="test", owner="test", table_id="test", location="test", partitioning=[], last_updated_ms=0, permissions=[], visibility="PUBLIC", current_schema=Schema(columns=[]), freshness_life_in_days=0, retention_in_days=0 ) with patch('requests.post') as mock_post: mock_post.return_value.status_code = 200 @@ -76,5 +75,5 @@ def test_send_request_with_retries(): if __name__ == "__main__": # pragma: no cover from tests.tools import run_tests - + run_tests() \ No newline at end of file diff --git a/tests/components/test_manifest_building.py b/tests/components/test_manifest_building.py index ea905e3..a61eaf1 100644 --- a/tests/components/test_manifest_building.py +++ b/tests/components/test_manifest_building.py @@ -9,7 +9,7 @@ sys.path.insert(1, os.path.join(sys.path[0], "../..")) from tarchia.models import Schema, Column -from tarchia.manifests import build_manifest_entry +from tarchia.metadata.manifests import build_manifest_entry from tarchia.utils.to_int import to_int from tarchia.exceptions import DataError @@ -32,7 +32,6 @@ def test_build_basic_manifest(): basic = build_manifest_entry("testdata/planets/planets.parquet", SCHEMA).as_dict() assert basic["file_path"] == "testdata/planets/planets.parquet" - assert basic["file_format"] == "parquet" assert basic["record_count"] == 9 assert basic["sha256_checksum"] == "5a66d1e67f9b3749983da132d78e4744ee78b09b6549719c4d6359f573ac3baa" diff --git a/tests/components/test_manifest_pruning.py b/tests/components/test_manifest_pruning.py index ba80e04..82e6588 100644 --- a/tests/components/test_manifest_pruning.py +++ b/tests/components/test_manifest_pruning.py @@ -7,8 +7,8 @@ sys.path.insert(1, os.path.join(sys.path[0], "../..")) from tarchia.models import Schema, Column -from tarchia.manifests.pruning import parse_filters, prune -from tarchia.manifests import ManifestEntry +from tarchia.metadata.manifests.pruning import parse_filters, prune +from tarchia.metadata.manifests import ManifestEntry def test_basic_parsing(): diff --git a/tests/components/test_transaction_signing.py b/tests/components/test_transaction_signing.py index ea3f351..7e2a894 100644 --- a/tests/components/test_transaction_signing.py +++ b/tests/components/test_transaction_signing.py @@ -8,8 +8,8 @@ import pytest -from tarchia.v1.routes.data_management import encode_and_sign_transaction -from tarchia.v1.routes.data_management import verify_and_decode_transaction +from tarchia.api.v1.data_management import encode_and_sign_transaction +from tarchia.api.v1.data_management import verify_and_decode_transaction from tarchia.exceptions import TransactionError from tarchia.models import Transaction diff --git a/tests/endpoints/test_table_endpoints.py b/tests/endpoints/test_table_endpoints.py index cc9a4e7..4fb6543 100644 --- a/tests/endpoints/test_table_endpoints.py +++ b/tests/endpoints/test_table_endpoints.py @@ -131,61 +131,7 @@ def test_maintain_table_metadata(): assert response.status_code == 200, f"{response.status_code} - {response.content}" -def test_maintain_table_schema(): - assert False, "This isn't how we're going to manage schemas going forward" - - try: - os.remove(os.environ["CATALOG_NAME"]) - except FileNotFoundError: - pass - - ensure_owner() - - client = TestClient(application) - - new_table = CreateTableRequest( - name="test_dataset_schema_test", - location="gs://dataset/", - steward="bob", - table_schema=Schema(columns=[Column(name="column")]), - freshness_life_in_days=0, - retention_in_days=0 - ) - - # create the table - response = client.post(url=f"/v1/tables/{TEST_OWNER}", content=new_table.serialize()) - assert response.status_code == 200, f"{response.status_code} - {response.content}" - - # confirm we know the schema value before we start - response = client.get(url=f"/v1/tables/{TEST_OWNER}/test_dataset_schema_test") - assert response.status_code == 200, f"{response.status_code} - {response.content}" - current_schema = response.json()["current_schema"] - assert current_schema == {"columns": [{"name": "column", "aliases": [], "default": None, "description": "", "type": "VARCHAR"}]}, current_schema - - # update the schema - response = client.patch( - url=f"/v1/tables/{TEST_OWNER}/test_dataset_schema_test/schema", - content=Schema(columns=[Column(name="new", type="VARCHAR", default="true")]).serialize(), - ) - assert response.status_code == 200, f"{response.status_code} - {response.content}" - - # confirm the schema has been updated correctly - response = client.get(url=f"/v1/tables/{TEST_OWNER}/test_dataset_schema_test") - assert response.status_code == 200, f"{response.status_code} - {response.content}" - schema = response.json()["current_schema"] - assert schema is not None - assert response.json()["current_schema"] == { - "columns": [{"name": "new", "aliases": [], "default": "true", "description": "", "type": "VARCHAR"}] - }, schema - - # delete the table - response = client.delete(url=f"/v1/tables/{TEST_OWNER}/test_dataset_schema_test") - assert response.status_code == 200, f"{response.status_code} - {response.content}" - - if __name__ == "__main__": # pragma: no cover from tests.tools import run_tests - - test_create_read_update_delete_table() run_tests() diff --git a/tests/storage/test_local_storage.py b/tests/storage/test_local_storage.py index c13deb9..c31a7e8 100644 --- a/tests/storage/test_local_storage.py +++ b/tests/storage/test_local_storage.py @@ -8,7 +8,7 @@ sys.path.insert(1, os.path.join(sys.path[0], "../..")) -from tarchia.storage import storage_factory +from tarchia.interfaces.storage import storage_factory TEMP_FOLDER = "_temp" From 138e10c15270e889ec9e7f9fa48b5db6834e0278 Mon Sep 17 00:00:00 2001 From: XB500 Date: Thu, 25 Jul 2024 20:34:47 +0000 Subject: [PATCH 10/16] Tarchia Version 0.0.0-alpha.127 --- tarchia/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tarchia/__version__.py b/tarchia/__version__.py index 2932398..cbd205a 100644 --- a/tarchia/__version__.py +++ b/tarchia/__version__.py @@ -1,4 +1,4 @@ -__build__ = 126 +__build__ = 127 # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 36b2192c7382a8e4c03a03cd52e61ae227c92d31 Mon Sep 17 00:00:00 2001 From: joocer Date: Fri, 26 Jul 2024 00:37:24 +0100 Subject: [PATCH 11/16] branching --- tarchia/api/v1/branch_management.py | 94 ++++++++++++++++++++++- tarchia/api/v1/commit_management.py | 21 +++-- tarchia/api/v1/data_management.py | 93 +++++++++++----------- tarchia/api/v1/search.py | 11 +++ tarchia/api/v1/subscription_management.py | 10 +++ tarchia/api/v1/table_management.py | 38 ++++++++- tarchia/api/v1/view_management.py | 23 ++++++ tarchia/metadata/manifests/__init__.py | 5 +- tarchia/models/metadata_models.py | 2 + tarchia/models/request_models.py | 6 -- tarchia/utils/__init__.py | 6 ++ 11 files changed, 245 insertions(+), 64 deletions(-) create mode 100644 tarchia/api/v1/search.py create mode 100644 tarchia/api/v1/subscription_management.py create mode 100644 tarchia/api/v1/view_management.py diff --git a/tarchia/api/v1/branch_management.py b/tarchia/api/v1/branch_management.py index d6125f1..2dc6957 100644 --- a/tarchia/api/v1/branch_management.py +++ b/tarchia/api/v1/branch_management.py @@ -1,5 +1,93 @@ """ -[GET] /repos/{owner}/{repo}/branch -[GET] /repos/{owner}/{repo}/branch/{branch}/head -[POST] /repos/{owner}/{repo}/merges => {"base", "head", "message"} +[GET] /tables/{owner}/{table}/branches/{branch} (returns HEAD) +[POST] /tables/{owner}/{table}/branches +[DELETE]/tables/{owner}/{table}/branches/{branch} +[POST] /tables/{owner}/{table}/merge """ + +import orjson +from fastapi import APIRouter +from fastapi import Path +from fastapi import Query +from fastapi import Request +from fastapi.responses import ORJSONResponse + +from tarchia.exceptions import CommitNotFoundError +from tarchia.models import Schema +from tarchia.utils.constants import IDENTIFIER_REG_EX + +router = APIRouter() + + +@router.get("/tables/{owner}/{table}/branches/{branch}", response_class=ORJSONResponse) +async def get_branch( + owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), + table: str = Path(description="The name of the table.", pattern=IDENTIFIER_REG_EX), + branch: str = Path(description="The name of the branch.", pattern=IDENTIFIER_REG_EX), +): + # Your logic to retrieve branch information, including the HEAD commit + branch_info = { + "owner": owner, + "table": table, + "branch": branch, + "head_commit": "commit_sha_of_head", # Replace with actual HEAD commit SHA + "commits": ["commit1", "commit2", "commit3"], # Example commits + } + return branch_info + + +@router.post("/tables/{owner}/{table}/branches", response_class=ORJSONResponse) +async def create_branch( + owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), + table: str = Path(description="The name of the table.", pattern=IDENTIFIER_REG_EX), + branch: str = Query(description="The name of the new branch.", pattern=IDENTIFIER_REG_EX), + source_branch: str = Query( + description="The name of the source branch to create from.", pattern=IDENTIFIER_REG_EX + ), +): + # Your logic to create a new branch from the source branch + new_branch_info = { + "owner": owner, + "table": table, + "branch": branch, + "source_branch": source_branch, + "message": "Branch created successfully", + } + return new_branch_info + + +@router.delete("/tables/{owner}/{table}/branches/{branch}", response_class=ORJSONResponse) +async def delete_branch( + owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), + table: str = Path(description="The name of the table.", pattern=IDENTIFIER_REG_EX), + branch: str = Path(description="The name of the branch to delete.", pattern=IDENTIFIER_REG_EX), +): + # Your logic to delete a branch + delete_info = { + "owner": owner, + "table": table, + "branch": branch, + "message": "Branch deleted successfully", + } + return delete_info + + +@router.post("/tables/{owner}/{table}/merge", response_class=ORJSONResponse) +async def merge_branch( + owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), + table: str = Path(description="The name of the table.", pattern=IDENTIFIER_REG_EX), + base: str = Query(description="The branch to merge into.", pattern=IDENTIFIER_REG_EX), + head: str = Query(description="The branch to merge from.", pattern=IDENTIFIER_REG_EX), +): + # Your logic to perform the merge operation + # For now, we assume there are no merge conflicts + + # Example response, replace with actual merge logic + merge_info = { + "owner": owner, + "table": table, + "base_branch": base, + "head_branch": head, + "message": "Branches merged successfully", + } + return merge_info diff --git a/tarchia/api/v1/commit_management.py b/tarchia/api/v1/commit_management.py index 87f41e0..084a692 100644 --- a/tarchia/api/v1/commit_management.py +++ b/tarchia/api/v1/commit_management.py @@ -1,5 +1,7 @@ """ +[GET] /tables/{owner}/{table}/commits/{commit_sha} +[GET] /tables/{owner}/{table}/commits?branch,user,before,after,page_size """ import datetime @@ -36,9 +38,10 @@ async def get_table_commit( from tarchia.metadata.manifests import get_manifest from tarchia.metadata.manifests.pruning import parse_filters from tarchia.utils import build_root + from tarchia.utils import get_base_url from tarchia.utils.catalogs import identify_table - base_url = request.url.scheme + "://" + request.url.netloc + base_url = get_base_url(request=request) # read the data from the catalog for this table catalog_entry = identify_table(owner, table) @@ -84,17 +87,23 @@ async def get_list_of_table_commits( request: Request, owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), table: str = Path(description="The name of the table.", pattern=IDENTIFIER_REG_EX), - before: datetime.datetime = Query(None, description="Filter commits"), - after: datetime.datetime = Query(None, description="Filter commits"), - page_size: int = Query(100, description="Maximum items to show"), + branch: str = Query( + default=MAIN_BRANCH, + description="The commit branch of the table.", + pattern=IDENTIFIER_REG_EX, + ), + user: str = Query(default=None, description="The committer.", pattern=IDENTIFIER_REG_EX), + before: datetime.datetime = Query(None, description="Filter commits before this date."), + after: datetime.datetime = Query(None, description="Filter commits after this date."), + page_size: int = Query(100, description="Maximum items to show."), ): from tarchia.interfaces.storage import storage_factory from tarchia.metadata.history import HistoryTree from tarchia.utils import build_root + from tarchia.utils import get_base_url from tarchia.utils.catalogs import identify_table - base_url = request.url.scheme + "://" + request.url.netloc - branch = MAIN_BRANCH + base_url = get_base_url(request=request) # read the data from the catalog for this table catalog_entry = identify_table(owner, table) diff --git a/tarchia/api/v1/data_management.py b/tarchia/api/v1/data_management.py index f223b43..b9b1189 100644 --- a/tarchia/api/v1/data_management.py +++ b/tarchia/api/v1/data_management.py @@ -1,28 +1,37 @@ """ Transaction control -- additions and deletions -- schema changes +@router.post("/transactions/start") -> [GET] /tables/{owner}/{table}/commits/{commit}/pull/start +@router.post("/transactions/commit") -> [POST] /pull/commit +@router.post("/transactions/stage") -> [POST] /pull/stage +@router.post("/transactions/truncate") -> [POST] /pull/truncate +@router.patch("/transaction/encryption")-> [PATCH] /pull/encryption + -> [POST] /pull/abort """ import base64 import hashlib import time from typing import List +from typing import Literal +from typing import Optional +from typing import Union import orjson from fastapi import APIRouter from fastapi import HTTPException +from fastapi import Path from fastapi import Request from tarchia.exceptions import TransactionError +from tarchia.models import Commit from tarchia.models import CommitRequest from tarchia.models import StageFilesRequest -from tarchia.models import TableRequest from tarchia.models import Transaction from tarchia.utils import config from tarchia.utils.constants import COMMITS_ROOT from tarchia.utils.constants import HISTORY_ROOT +from tarchia.utils.constants import IDENTIFIER_REG_EX from tarchia.utils.constants import MAIN_BRANCH from tarchia.utils.constants import MANIFEST_ROOT @@ -87,12 +96,12 @@ def verify_and_decode_transaction(transaction_data: str) -> Transaction: return Transaction(**transaction) -def load_old_commit(storage_provider, commit_root, parent_commit): +def load_old_commit(storage_provider, commit_root, parent_commit) -> Optional[Commit]: if parent_commit: commit_file = storage_provider.read_blob(f"{commit_root}/commit-{parent_commit}.json") if commit_file: - return orjson.loads(commit_file) - return {} + return Commit(**orjson.loads(commit_file)) + return None def build_new_manifest(old_manifest, transaction, schema): @@ -133,42 +142,49 @@ def xor_hex_strings(hex_strings: List[str]) -> str: return result_bytes.hex() -@router.post("/transactions/start") -async def start_transaction(table: TableRequest): +@router.post("/tables/{owner}/{table}/commits/{commit}/pull/start") +async def start_transaction( + owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), + table: str = Path(description="The name of the table.", pattern=IDENTIFIER_REG_EX), + commit: Union[str, Literal["head"]] = Path(description="The commit to retrieve."), +): from tarchia.interfaces.storage import storage_factory from tarchia.utils import build_root from tarchia.utils import generate_uuid from tarchia.utils.catalogs import identify_table - catalog_entry = identify_table(owner=table.owner, table=table.table) + catalog_entry = identify_table(owner=owner, table=table) table_id = catalog_entry.table_id - if table.commit_sha is None: - table.commit_sha = catalog_entry.current_commit_sha - else: - commit_root = build_root(COMMITS_ROOT, owner=table.owner, table_id=catalog_entry.table_id) - storage_provider = storage_factory() - commit_file = storage_provider.read_blob(f"{commit_root}/commit-{table.commit_sha}.json") - if not commit_file: - raise TransactionError("Snapshot not found") + if commit == "head": + commit = catalog_entry.current_commit_sha + + commit_root = build_root(COMMITS_ROOT, owner=owner, table_id=catalog_entry.table_id) + storage_provider = storage_factory() + parent_commit = load_old_commit(storage_provider, commit_root, commit) + + if parent_commit is None: + raise TransactionError("Commit not found") transaction_id = generate_uuid() transaction = Transaction( transaction_id=transaction_id, expires_at=int(time.time()), table_id=table_id, - table=table.table, - owner=table.owner, - parent_commit_sha=table.commit_sha, + table=table, + owner=owner, + parent_commit_sha=commit, additions=[], deletions=[], truncate=False, + encryption=parent_commit.encryption, + table_schema=parent_commit.table_schema, ) encoded_data = encode_and_sign_transaction(transaction) return {"message": "Transaction started", "encoded_transaction": encoded_data} -@router.post("/transactions/commit") +@router.post("/pull/commit") async def commit_transaction(request: Request, commit_request: CommitRequest): """ Commits a transaction by verifying it, updating the manifest and commit, @@ -294,7 +310,7 @@ async def commit_transaction(request: Request, commit_request: CommitRequest): raise HTTPException(status_code=500, detail=str(e)) from e -@router.post("/transactions/stage") +@router.post("/pull/stage") async def add_files_to_transaction(stage: StageFilesRequest): """ Add files to a table. @@ -312,7 +328,7 @@ async def add_files_to_transaction(stage: StageFilesRequest): return {"message": "Files added to transaction", "encoded_transaction": new_encoded_transaction} -@router.post("/transactions/truncate") +@router.post("/pull/truncate") async def truncate_all_files(encoded_transaction: str): """ Truncate (delete all records) from a table. @@ -338,30 +354,19 @@ async def truncate_all_files(encoded_transaction: str): } -@router.patch("/tables/{owner}/{table}/schema") -async def update_schema( +@router.patch("/pull/encryption") +async def update_encryption( schema: Request, owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), table: str = Path(description="The name of the table.", pattern=IDENTIFIER_REG_EX), ): - from tarchia.metadata.schemas import validate_schema_update - from tarchia.utils.catalogs import identify_table - - # is the new schema valid - for col in schema.columns: - col.is_valid() + raise NotImplementedError("Create a commit") - catalog_entry = identify_table(owner=owner, table=table) - - # is the evolution valid - validate_schema_update(current_schema=catalog_entry.current_schema, updated_schema=schema) - # update the schema - table_id = catalog_entry.table_id - catalog_entry.current_schema = schema - catalog_provider.update_table(table_id, catalog_entry) - - return { - "message": "Schema Updated", - "table": f"{owner}.{table}", - } \ No newline at end of file +@router.patch("/pull/abort") +async def abort_pull( + schema: Request, + owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), + table: str = Path(description="The name of the table.", pattern=IDENTIFIER_REG_EX), +): + raise NotImplementedError("Create a commit") diff --git a/tarchia/api/v1/search.py b/tarchia/api/v1/search.py new file mode 100644 index 0000000..34d4e04 --- /dev/null +++ b/tarchia/api/v1/search.py @@ -0,0 +1,11 @@ +from fastapi import APIRouter +from fastapi import Request +from fastapi.responses import ORJSONResponse +from typing import List +router = APIRouter() + + + +@router.get("/search", response_class=ORJSONResponse) +async def search(request: Request, term: str, scopes: List[str] = None): + raise NotImplementedError("Not Implemented") \ No newline at end of file diff --git a/tarchia/api/v1/subscription_management.py b/tarchia/api/v1/subscription_management.py new file mode 100644 index 0000000..1967bc3 --- /dev/null +++ b/tarchia/api/v1/subscription_management.py @@ -0,0 +1,10 @@ +""" +/tables/{owner}/{table}/hooks +/owners/{owner}/hooks + +/{hook_id}/ +/{hook_id}/ping + + +we only record the latest attempt +""" \ No newline at end of file diff --git a/tarchia/api/v1/table_management.py b/tarchia/api/v1/table_management.py index 93a2995..5da0f45 100644 --- a/tarchia/api/v1/table_management.py +++ b/tarchia/api/v1/table_management.py @@ -12,6 +12,7 @@ from tarchia.models import TableCatalogEntry from tarchia.models import UpdateMetadataRequest from tarchia.models import UpdateValueRequest +from tarchia.utils import get_base_url from tarchia.utils.config import METADATA_ROOT from tarchia.utils.constants import COMMITS_ROOT from tarchia.utils.constants import HISTORY_ROOT @@ -35,7 +36,7 @@ async def list_tables(owner: str, request: Request): List[Dict[str, Any]]: A list of tables with their metadata, including the commit URL if applicable. """ - base_url = request.url.scheme + "://" + request.url.netloc + base_url = get_base_url(request=request) table_list = [] @@ -89,7 +90,7 @@ async def create_table( from tarchia.utils import generate_uuid from tarchia.utils.catalogs import identify_owner - base_url = request.url.scheme + "://" + request.url.netloc + base_url = get_base_url(request=request) timestamp = int(time.time_ns() / 1e6) # check if we have a table with that name already @@ -107,7 +108,7 @@ async def create_table( # build the new commit record new_commit = Commit( - data_hash="", + data_hash="0" * 64, user="user", message="Initial commit", branch=MAIN_BRANCH, @@ -280,10 +281,39 @@ async def update_table( catalog_entry = identify_table(owner, table) setattr(catalog_entry, attribute, value.value) - catalog_provider.update_table(table_id=catalog_entry.table_id, entry=catalog_entry) return { "message": "Table updated", "table": f"{owner}.{table}", } + + +@router.patch("/tables/{owner}/{table}/branches/{branch}/schema") +async def update_schema( + schema: Request, + owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), + table: str = Path(description="The name of the table.", pattern=IDENTIFIER_REG_EX), +): + raise NotImplementedError("Create a commit") + from tarchia.metadata.schemas import validate_schema_update + from tarchia.utils.catalogs import identify_table + + # is the new schema valid + for col in schema.columns: + col.is_valid() + + catalog_entry = identify_table(owner=owner, table=table) + + # is the evolution valid + validate_schema_update(current_schema=catalog_entry.current_schema, updated_schema=schema) + + # update the schema + table_id = catalog_entry.table_id + catalog_entry.current_schema = schema + catalog_provider.update_table(table_id, catalog_entry) + + return { + "message": "Schema Updated", + "table": f"{owner}.{table}", + } diff --git a/tarchia/api/v1/view_management.py b/tarchia/api/v1/view_management.py new file mode 100644 index 0000000..2e7d29a --- /dev/null +++ b/tarchia/api/v1/view_management.py @@ -0,0 +1,23 @@ +from fastapi import APIRouter +from fastapi import Request +from fastapi.responses import ORJSONResponse +from typing import List +router = APIRouter() + + + +@router.get("/views/{owner}", response_class=ORJSONResponse) +async def list_views(request: Request, owner:str): + raise NotImplementedError("Not Implemented") + +@router.post("/views/{owner}", response_class=ORJSONResponse) +async def create_view(request: Request, owner:str): + raise NotImplementedError("Not Implemented") + +@router.get("/views/{owner}/{view}", response_class=ORJSONResponse) +async def get_view(request: Request, owner:str, view:str): + raise NotImplementedError("Not Implemented") + +@router.delete("/views/{owner}/{view}", response_class=ORJSONResponse) +async def delete_view(request: Request, owner:str, view:str): + raise NotImplementedError("Not Implemented") \ No newline at end of file diff --git a/tarchia/metadata/manifests/__init__.py b/tarchia/metadata/manifests/__init__.py index 0a91192..93b7a01 100644 --- a/tarchia/metadata/manifests/__init__.py +++ b/tarchia/metadata/manifests/__init__.py @@ -13,7 +13,7 @@ def get_manifest( - location: str, + location: Optional[str], storage_provider: StorageProvider, filter_conditions: Optional[List[Tuple[str, str, int]]], ) -> List[ManifestEntry]: @@ -42,6 +42,9 @@ def get_manifest( manifest = [] + if location is None: + return manifest + # get the manifest manifest_bytes = storage_provider.read_blob(location) manifest_complete = fastavro.reader(BytesIO(manifest_bytes)) diff --git a/tarchia/models/metadata_models.py b/tarchia/models/metadata_models.py index 7cc9097..4c7917f 100644 --- a/tarchia/models/metadata_models.py +++ b/tarchia/models/metadata_models.py @@ -195,6 +195,8 @@ class Transaction(TarchiaBaseModel): table_id: str table: str owner: str + encryption: Optional[EncryptionDetails] + table_schema: Schema parent_commit_sha: Optional[str] = None additions: List[str] = Field(default_factory=list) deletions: List[str] = Field(default_factory=list) diff --git a/tarchia/models/request_models.py b/tarchia/models/request_models.py index bc551f7..64e567a 100644 --- a/tarchia/models/request_models.py +++ b/tarchia/models/request_models.py @@ -97,12 +97,6 @@ class UpdateValueRequest(TarchiaBaseModel): value: Any -class TableRequest(TarchiaBaseModel): - owner: str - table: str - commit_sha: Optional[str] = None - - class CommitRequest(TarchiaBaseModel): encoded_transaction: str commit_message: str diff --git a/tarchia/utils/__init__.py b/tarchia/utils/__init__.py index e3ccaf5..64fc49e 100644 --- a/tarchia/utils/__init__.py +++ b/tarchia/utils/__init__.py @@ -1,5 +1,7 @@ import uuid +from fastapi import Request + from tarchia.utils.config import METADATA_ROOT @@ -8,6 +10,10 @@ def generate_uuid() -> str: return str(uuid.uuid4()).replace("-", "")[-16:] +def get_base_url(request: Request) -> str: + return request.url.scheme + "://" + request.url.netloc + + def build_root(root: str, owner: str, table_id: str) -> str: """Wrap some repeat legwork""" value = root.replace("[metadata_root]", METADATA_ROOT) From 7c69d3f1fdde15a0a946abcc6d528da5350c7289 Mon Sep 17 00:00:00 2001 From: XB500 Date: Thu, 25 Jul 2024 23:37:41 +0000 Subject: [PATCH 12/16] Tarchia Version 0.0.0-alpha.128 --- tarchia/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tarchia/__version__.py b/tarchia/__version__.py index cbd205a..ab3547d 100644 --- a/tarchia/__version__.py +++ b/tarchia/__version__.py @@ -1,4 +1,4 @@ -__build__ = 127 +__build__ = 128 # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 65e140aa3b1c243b9dc8103f1b149b4a6b308c03 Mon Sep 17 00:00:00 2001 From: joocer Date: Fri, 26 Jul 2024 00:38:48 +0100 Subject: [PATCH 13/16] branching --- tarchia/api/v1/search.py | 7 ++++--- tarchia/api/v1/subscription_management.py | 4 ++-- tarchia/api/v1/view_management.py | 16 +++++++++------- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/tarchia/api/v1/search.py b/tarchia/api/v1/search.py index 34d4e04..90b1654 100644 --- a/tarchia/api/v1/search.py +++ b/tarchia/api/v1/search.py @@ -1,11 +1,12 @@ +from typing import List + from fastapi import APIRouter from fastapi import Request from fastapi.responses import ORJSONResponse -from typing import List -router = APIRouter() +router = APIRouter() @router.get("/search", response_class=ORJSONResponse) async def search(request: Request, term: str, scopes: List[str] = None): - raise NotImplementedError("Not Implemented") \ No newline at end of file + raise NotImplementedError("Not Implemented") diff --git a/tarchia/api/v1/subscription_management.py b/tarchia/api/v1/subscription_management.py index 1967bc3..013dfbc 100644 --- a/tarchia/api/v1/subscription_management.py +++ b/tarchia/api/v1/subscription_management.py @@ -1,5 +1,5 @@ """ -/tables/{owner}/{table}/hooks +/tables/{owner}/{table}/hooks /owners/{owner}/hooks /{hook_id}/ @@ -7,4 +7,4 @@ we only record the latest attempt -""" \ No newline at end of file +""" diff --git a/tarchia/api/v1/view_management.py b/tarchia/api/v1/view_management.py index 2e7d29a..b677230 100644 --- a/tarchia/api/v1/view_management.py +++ b/tarchia/api/v1/view_management.py @@ -1,23 +1,25 @@ from fastapi import APIRouter from fastapi import Request from fastapi.responses import ORJSONResponse -from typing import List -router = APIRouter() +router = APIRouter() @router.get("/views/{owner}", response_class=ORJSONResponse) -async def list_views(request: Request, owner:str): +async def list_views(request: Request, owner: str): raise NotImplementedError("Not Implemented") + @router.post("/views/{owner}", response_class=ORJSONResponse) -async def create_view(request: Request, owner:str): +async def create_view(request: Request, owner: str): raise NotImplementedError("Not Implemented") + @router.get("/views/{owner}/{view}", response_class=ORJSONResponse) -async def get_view(request: Request, owner:str, view:str): +async def get_view(request: Request, owner: str, view: str): raise NotImplementedError("Not Implemented") + @router.delete("/views/{owner}/{view}", response_class=ORJSONResponse) -async def delete_view(request: Request, owner:str, view:str): - raise NotImplementedError("Not Implemented") \ No newline at end of file +async def delete_view(request: Request, owner: str, view: str): + raise NotImplementedError("Not Implemented") From 1f09b85e2ec033bdd4266b7f87eb8b0bfb0c467d Mon Sep 17 00:00:00 2001 From: XB500 Date: Thu, 25 Jul 2024 23:39:02 +0000 Subject: [PATCH 14/16] Tarchia Version 0.0.0-alpha.129 --- tarchia/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tarchia/__version__.py b/tarchia/__version__.py index ab3547d..e3b393d 100644 --- a/tarchia/__version__.py +++ b/tarchia/__version__.py @@ -1,4 +1,4 @@ -__build__ = 128 +__build__ = 129 # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From d467f9a8dcf7b36ffe65e4685f8449679e45c3b0 Mon Sep 17 00:00:00 2001 From: joocer Date: Fri, 26 Jul 2024 00:45:17 +0100 Subject: [PATCH 15/16] branching --- tarchia/api/v1/__init__.py | 4 ++++ tarchia/api/v1/data_management.py | 12 ++++++------ tarchia/models/__init__.py | 1 - 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/tarchia/api/v1/__init__.py b/tarchia/api/v1/__init__.py index 5a46c6e..de29123 100644 --- a/tarchia/api/v1/__init__.py +++ b/tarchia/api/v1/__init__.py @@ -1,12 +1,16 @@ from fastapi import APIRouter +from .branch_management import router as branch_router from .commit_management import router as commit_router from .data_management import router as data_router from .owner_management import router as owner_router +from .search import router as search_router from .table_management import router as table_router v1_router = APIRouter(prefix="/v1") +v1_router.include_router(branch_router, tags=["Branch Management"]) v1_router.include_router(commit_router, tags=["Commit Management"]) v1_router.include_router(data_router, tags=["Data Management"]) v1_router.include_router(owner_router, tags=["Owner Management"]) +v1_router.include_router(search_router, tags=["Search"]) v1_router.include_router(table_router, tags=["Table Management"]) diff --git a/tarchia/api/v1/data_management.py b/tarchia/api/v1/data_management.py index b9b1189..80d944a 100644 --- a/tarchia/api/v1/data_management.py +++ b/tarchia/api/v1/data_management.py @@ -142,11 +142,11 @@ def xor_hex_strings(hex_strings: List[str]) -> str: return result_bytes.hex() -@router.post("/tables/{owner}/{table}/commits/{commit}/pull/start") +@router.post("/tables/{owner}/{table}/commits/{commit_sha}/pull/start") async def start_transaction( owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), table: str = Path(description="The name of the table.", pattern=IDENTIFIER_REG_EX), - commit: Union[str, Literal["head"]] = Path(description="The commit to retrieve."), + commit_sha: Union[str, Literal["head"]] = Path(description="The commit to retrieve."), ): from tarchia.interfaces.storage import storage_factory from tarchia.utils import build_root @@ -156,12 +156,12 @@ async def start_transaction( catalog_entry = identify_table(owner=owner, table=table) table_id = catalog_entry.table_id - if commit == "head": - commit = catalog_entry.current_commit_sha + if commit_sha == "head": + commit_sha = catalog_entry.current_commit_sha commit_root = build_root(COMMITS_ROOT, owner=owner, table_id=catalog_entry.table_id) storage_provider = storage_factory() - parent_commit = load_old_commit(storage_provider, commit_root, commit) + parent_commit = load_old_commit(storage_provider, commit_root, commit_sha) if parent_commit is None: raise TransactionError("Commit not found") @@ -173,7 +173,7 @@ async def start_transaction( table_id=table_id, table=table, owner=owner, - parent_commit_sha=commit, + parent_commit_sha=commit_sha, additions=[], deletions=[], truncate=False, diff --git a/tarchia/models/__init__.py b/tarchia/models/__init__.py index 6ef6ab1..07b599e 100644 --- a/tarchia/models/__init__.py +++ b/tarchia/models/__init__.py @@ -16,6 +16,5 @@ from .request_models import CreateOwnerRequest from .request_models import CreateTableRequest from .request_models import StageFilesRequest -from .request_models import TableRequest from .request_models import UpdateMetadataRequest from .request_models import UpdateValueRequest From 091220c4b0eafb10170bb23adcb7dafc9506b59f Mon Sep 17 00:00:00 2001 From: XB500 Date: Thu, 25 Jul 2024 23:45:32 +0000 Subject: [PATCH 16/16] Tarchia Version 0.0.0-alpha.130 --- tarchia/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tarchia/__version__.py b/tarchia/__version__.py index e3b393d..24b5e5a 100644 --- a/tarchia/__version__.py +++ b/tarchia/__version__.py @@ -1,4 +1,4 @@ -__build__ = 129 +__build__ = 130 # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.