Merge pull request #95 from mabel-dev/branching

Branching
mabel-dev · Jul 27, 2024 · 23d048f · 23d048f
2 parents e5b09f4 + 091220c
commit 23d048f
Show file tree

Hide file tree

Showing 55 changed files with 1,529 additions and 324 deletions.
diff --git a/README.md b/README.md
@@ -1,5 +1,25 @@
 # Tarchia
 
+Data As Code.
+
+- Data Changes as Commits
+- Branching for Development
+- Automated Testing
+- Merging and Deployment
+
+
+
+<!---
+- Schema changes in transaction commits
+- Multi-branch
+- Data should have a stale timeframe and a purge timeframe
+- Native Masking capability
+- Native Sampling capability
+- Create/Maintain Triggers
+- Expectations Checks on Commit
+- Secrets/Protected Data Checks on Commit
+--->
+
 Tarchia is an Active Data Catalog.
 
 Tarchia actively manages and catalogs data in real-time. Unlike traditional catalogs that serve merely as passive records, our Active Data Catalog is essential to the operational workflow, ensuring meta data is always up-to-date and readily accessible for system processes.
@@ -47,11 +67,12 @@ table/
 ~~~mermaid
 flowchart TD
     CATALOG  --> COMMITS(Commit History)
+    CATALOG  --> PERMS(Permissions)
     CATALOG[(Catalog)] --> |Current| COMMIT(Commit)
-    CATALOG  --> |Current| SCHEMA(Schema)
     subgraph  
         COMMITS  -..-> |Historical| COMMIT
-        COMMIT --> SCHEMA
+        COMMIT --> SCHEMA(Schema)
+        COMMIT --> ENCRYPTION(Encryption)
         COMMIT --> MAN_LIST(Manifest/List)
     end
     MAN_LIST --> DATA(Data Files)

diff --git a/cloudbuild.yaml b/cloudbuild.yaml
@@ -33,9 +33,9 @@ steps:
         "managed",
         "--allow-unauthenticated",
         "--timeout",
-        "300",
+        "60",
         "--cpu",
-        "1",
+        "2",
         "--memory",
         "1Gi",
         "--update-env-vars",

diff --git a/main.py b/main.py
@@ -37,13 +37,13 @@
 from uvicorn import run
 
 from tarchia import __version__
-from tarchia.middlewares import AuditMiddleware
-from tarchia.middlewares import AuthorizationMiddleware
-from tarchia.v1 import routes as v1_routes
+from tarchia.api.middlewares import AuditMiddleware
+from tarchia.api.middlewares import AuthorizationMiddleware
+from tarchia.api.v1 import v1_router
 
 application = FastAPI(title="Tarchia Metastore", version=__version__)
 
-application.include_router(v1_routes.v1_router)
+application.include_router(v1_router)
 application.add_middleware(AuthorizationMiddleware)
 application.add_middleware(AuditMiddleware)
 

diff --git a/tarchia/__version__.py b/tarchia/__version__.py
@@ -1,4 +1,4 @@
-__build__ = 122
+__build__ = 130
 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

diff --git a/tarchia/compaction/__init__.py → tarchia/actions/compaction/__init__.py b/tarchia/compaction/__init__.py → tarchia/actions/compaction/__init__.py
diff --git a/tarchia/actions/scanners/expectations/evaluate.py b/tarchia/actions/scanners/expectations/evaluate.py
@@ -0,0 +1,72 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+
+from data_expectations import Expectations
+from data_expectations.errors import ExpectationNotMetError
+from data_expectations.errors import ExpectationNotUnderstoodError
+
+ALL_EXPECTATIONS = Expectations.all_expectations()
+
+
+def evaluate_record(
+    expectations: Expectations, record: dict, suppress_errors: bool = False
+) -> bool:
+    """
+    Test a single record against a defined set of expectations.
+
+    Args:
+        expectations: The Expectations instance.
+        record: The dictionary record to be tested.
+        all_expectations: The dictionary of all available expectations.
+        suppress_errors: Whether to suppress expectation errors and return False instead.
+
+    Returns:
+        True if all expectations are met, False otherwise.
+    """
+    for expectation_definition in expectations.set_of_expectations:
+        # get the name of the expectation
+        expectation = expectation_definition.expectation
+
+        if expectation not in ALL_EXPECTATIONS:
+            raise ExpectationNotUnderstoodError(expectation=expectation)
+
+        base_config = {
+            "row": record,
+            "column": expectation_definition.column,
+            **expectation_definition.config,
+        }
+
+        if not ALL_EXPECTATIONS[expectation](**base_config):
+            if not suppress_errors:
+                raise ExpectationNotMetError(expectation, record)
+            return False  # data failed to meet expectation
+
+    return True
+
+
+def evaluate_list(
+    expectations: Expectations, dictset: typing.Iterable[dict], suppress_errors: bool = False
+) -> bool:
+    """
+    Evaluate a set of records against a defined set of Expectations.
+
+    Args:
+        expectations: The Expectations instance.
+        dictset: The iterable set of dictionary records to be tested.
+        suppress_errors: Whether to suppress expectation errors and return False for the entire set.
+
+    Returns:
+        True if all records meet all Expectations, False otherwise.
+    """
+    return all(evaluate_record(expectations, record, suppress_errors) for record in dictset)