Skip to content

Commit

Permalink
add orphan flag and support order_by
Browse files Browse the repository at this point in the history
  • Loading branch information
oeway committed Oct 29, 2024
1 parent 20eeba6 commit 9c5dd31
Show file tree
Hide file tree
Showing 4 changed files with 151 additions and 41 deletions.
36 changes: 26 additions & 10 deletions docs/artifact-manager.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ gallery_manifest = {
}

# Create the collection with read permission for everyone and create permission for all authenticated users
await artifact_manager.create(prefix="collections/dataset-gallery", manifest=gallery_manifest, permissions={"*": "r", "@": "r+"})
# We set orphan=True to create a collection without a parent
await artifact_manager.create(prefix="collections/dataset-gallery", manifest=gallery_manifest, permissions={"*": "r", "@": "r+"}, orphan=True)
print("Dataset Gallery created.")
```

Expand Down Expand Up @@ -126,7 +127,8 @@ async def main():
"collection": [],
}
# Create the collection with read permission for everyone and create permission for all authenticated users
await artifact_manager.create(prefix="collections/dataset-gallery", manifest=gallery_manifest, permissions={"*": "r+", "@": "r+"})
# We set orphan=True to create a collection without a parent
await artifact_manager.create(prefix="collections/dataset-gallery", manifest=gallery_manifest, permissions={"*": "r+", "@": "r+"}, orphan=True)
print("Dataset Gallery created.")

# Create a new dataset inside the Dataset Gallery
Expand Down Expand Up @@ -197,7 +199,7 @@ gallery_manifest = {
"collection": [],
}
# Create the collection with read permission for everyone and create permission for all authenticated users
await artifact_manager.create(prefix="collections/schema-dataset-gallery", manifest=gallery_manifest, permissions={"*": "r+", "@": "r+"})
await artifact_manager.create(prefix="collections/schema-dataset-gallery", manifest=gallery_manifest, permissions={"*": "r+", "@": "r+"}, orphan=True)
print("Schema-based Dataset Gallery created.")
```

Expand Down Expand Up @@ -226,7 +228,7 @@ print("Valid dataset committed.")

## API References

### `create(prefix: str, manifest: dict, permissions: dict=None, stage: bool = False) -> None`
### `create(prefix: str, manifest: dict, permissions: dict=None, stage: bool = False, orphan: bool = False) -> None`

Creates a new artifact or collection with the specified manifest. The artifact is staged until committed. For collections, the `collection` field should be an empty list.

Expand All @@ -236,11 +238,17 @@ Creates a new artifact or collection with the specified manifest. The artifact i
- `manifest`: The manifest of the new artifact. Ensure the manifest follows the required schema if applicable (e.g., for collections).
- `permissions`: Optional. A dictionary containing user permissions. For example `{"*": "r+"}` gives read and create access to everyone, `{"@": "rw+"}` allows all authenticated users to read/write/create, and `{"user_id_1": "r+"}` grants read and create permissions to a specific user. You can also set permissions for specific operations, such as `{"user_id_1": ["read", "create"]}`. See detailed explanation about permissions below.
- `stage`: Optional. A boolean flag to stage the artifact. Default is `False`.
- `orphan`: Optional. A boolean flag to create the artifact without a parent collection. Default is `False`. If `True`, the artifact will not be associated with any collection. This is mainly used for creating top-level collections, and making sure the artifact is not associated with any parent collection (with inheritance of permissions).

**Note 1: If you set `stage=True`, you must call `commit()` to finalize the artifact.**

**Note 2: If you set `orphan=True`, the artifact will not be associated with any collection. An non-orphan artifact must have a parent collection.**

**Example:**

```python
await artifact_manager.create(prefix="collections/dataset-gallery/example-dataset", manifest=dataset_manifest, stage=True)
# Assuming we have already created a dataset-gallery collection, otherwise create it first or set orphan=True
await artifact_manager.create(prefix="collections/dataset-gallery/example-dataset", manifest=dataset_manifest, stage=True, orphan=False)
```

### Permissions
Expand Down Expand Up @@ -351,18 +359,22 @@ await artifact_manager.commit(prefix="collections/dataset-gallery/example-datase

---

### `delete(prefix: str) -> None`
### `delete(prefix: str, delete_files: bool = False, recursive: bool = False) -> None`

Deletes an artifact, its manifest, and all associated files from both the database and S3 storage.

**Parameters:**

- `prefix`: The path of the artifact, it can be a prefix relative to the current workspace (e.g., `"collections/dataset-gallery/example-dataset"`) or an absolute prefix with the workspace id (e.g., `"/my_workspace_id/collections/dataset-gallery/example-dataset"`).
- `delete_files`: Optional. A boolean flag to delete all files associated with the artifact. Default is `False`.
- `recursive`: Optional. A boolean flag to delete all child artifacts recursively. Default is `False`.

**Warning: If `delete_files` is set to `True`, `recursive` must be set to `True`, all child artifacts will be deleted, and all files associated with the child artifacts will be permanently deleted from the S3 storage. This operation is irreversible.**

**Example:**

```python
await artifact_manager.delete(prefix="collections/dataset-gallery/example-dataset")
await artifact_manager.delete(prefix="collections/dataset-gallery/example-dataset", delete_files=True)
```

---
Expand Down Expand Up @@ -465,9 +477,9 @@ manifest = await artifact_manager.read(prefix="collections/dataset-gallery/examp

---

### `list(prefix: str, keywords: list = None, filters: dict = None, mode: str = "AND", page: int = 0, page_size: int = 100) -> list`
### `list(prefix: str, keywords: list = None, filters: dict = None, mode: str = "AND", page: int = 0, page_size: int = 100, order_by: str = None, stage: bool = False, silent: bool = False) -> list`

List or search for artifacts within a collection based on keywords or filters, supporting both `AND` and `OR` modes.
List or search for child artifacts within a collection based on keywords or filters, supporting both `AND` and `OR` modes.

**Parameters:**

Expand All @@ -477,6 +489,9 @@ List or search for artifacts within a collection based on keywords or filters, s
- `mode`: Either `"AND"` or `"OR"` to combine conditions. Default is `"AND"`.
- `page`: Optional. The page number for paginated results. Default is `0`.
- `page_size`: Optional. The number of items per page. Default is `100`.
- `order_by`: Optional. The field to order results by. Default is `None` (ascending order by prefix). The available fields are `view_count`, `download_count`, `last_modified`, `created_at`, and `prefix`. You can also append `<` or `>` to the field name for ascending or descending order. For example, `view_count<` will order by view count in ascending order.
- `stage`: Optional. If `True`, lists staged artifacts. Default is `False`.
- `silent`: Optional. If `True`, suppresses the view count increment. Default is `False`.

**Returns:** A list of matching artifacts with summary fields.

Expand Down Expand Up @@ -522,7 +537,8 @@ gallery_manifest = {
"collection": [],
}
# Create the collection with read permission for everyone and create permission for all authenticated users
await artifact_manager.create(prefix="collections/dataset-gallery", manifest=gallery_manifest, permissions={"*": "r", "@": "r+"})
# We set orphan=True to create a collection without a parent
await artifact_manager.create(prefix="collections/dataset-gallery", manifest=gallery_manifest, permissions={"*": "r", "@": "r+"}, orphan=True)

# Step 3: Add a dataset to the gallery
dataset_manifest = {
Expand Down
2 changes: 1 addition & 1 deletion hypha/VERSION
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"version": "0.20.38.post14"
"version": "0.20.38.post15"
}
114 changes: 86 additions & 28 deletions hypha/artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import sys
import json
import copy
import traceback
from sqlalchemy import (
event,
Column,
Expand Down Expand Up @@ -163,7 +164,7 @@ async def get_artifact(
except PermissionError as e:
raise HTTPException(status_code=403, detail=str(e))
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
raise HTTPException(status_code=500, detail=str(traceback.format_exc()))

self.store.set_artifact_manager(self)
self.store.register_public_service(self.get_artifact_service())
Expand Down Expand Up @@ -459,6 +460,7 @@ async def create(
manifest: dict,
overwrite=False,
stage=False,
orphan=False,
permissions: dict = None,
context: dict = None,
):
Expand Down Expand Up @@ -504,11 +506,25 @@ async def create(
parent_artifact = await self._get_artifact(
session, ws, parent_prefix
)
if parent_artifact and not parent_artifact.manifest:
raise ValueError(
f"Parent artifact under prefix '{parent_prefix}' must be committed before creating a child artifact."
)
if parent_artifact and permissions is None:
permissions = parent_artifact.permissions
else:
parent_artifact = None

if not orphan and not parent_artifact:
raise ValueError(
f"Parent artifact not found (prefix: {parent_prefix}) for non-orphan artifact, please create the parent artifact first or set orphan=True."
)

if parent_artifact and orphan:
raise ValueError(
f"Parent artifact found (prefix: {parent_prefix}) for orphan artifact, please set orphan=False."
)

existing_artifact = await self._get_artifact(session, ws, prefix)

if existing_artifact:
Expand Down Expand Up @@ -718,7 +734,9 @@ async def commit(self, prefix, context: dict):
finally:
await session.close()

async def delete(self, prefix, context: dict):
async def delete(
self, prefix, delete_files=False, recursive=False, context: dict = None
):
"""Delete an artifact from the database and S3."""
if context is None or "ws" not in context:
raise ValueError("Context must include 'ws' (workspace).")
Expand All @@ -728,10 +746,30 @@ async def delete(self, prefix, context: dict):
else:
ws = context["ws"]

if delete_files and not recursive:
raise ValueError("Delete files requires recursive=True.")

user_info = UserInfo.model_validate(context["user"])
artifact = await self._get_artifact_with_permission(
ws, user_info, prefix, "delete"
)

if recursive:
# Remove all child artifacts
children = await self.list_children(prefix, context=context)
for child in children:
await self.delete(
child["_prefix"], delete_files=delete_files, context=context
)

if delete_files:
# Remove all files in the artifact's S3 prefix
artifact_path = safe_join(ws, f"{prefix}") + "/"
async with self.s3_controller.create_client_async() as s3_client:
await remove_objects_async(
s3_client, self.workspace_bucket, artifact_path
)

session = await self._get_session()
try:
async with session.begin():
Expand All @@ -743,15 +781,6 @@ async def delete(self, prefix, context: dict):
finally:
await session.close()

# Remove files from S3
await self._delete_s3_files(ws, prefix)

async def _delete_s3_files(self, ws, prefix):
"""Helper method to delete files associated with an artifact in S3."""
artifact_path = safe_join(ws, f"{prefix}") + "/"
async with self.s3_controller.create_client_async() as s3_client:
await remove_objects_async(s3_client, self.workspace_bucket, artifact_path)

async def list_files(
self,
prefix: str,
Expand Down Expand Up @@ -788,17 +817,13 @@ async def list_children(
mode="AND",
page: int = 0,
page_size: int = 100,
order_by=None,
stage=False,
silent=False,
context: dict = None,
):
"""
List artifacts within a collection under a specific prefix.
Supports:
- `keywords`: list of fuzzy search terms across all manifest fields.
- `filters`: dictionary of exact or fuzzy match for specific fields.
- `mode`: either 'AND' or 'OR' to combine conditions.
- `page`: the page number (0-indexed) for pagination.
- `page_size`: the number of results per page.
"""
if context is None or "ws" not in context:
raise ValueError("Context must include 'ws' (workspace).")
Expand All @@ -824,6 +849,12 @@ async def list_children(
ArtifactModel.workspace == ws,
ArtifactModel.prefix.like(f"{prefix}/%"),
)

if stage:
base_query = base_query.filter(ArtifactModel.stage_manifest != None)
else:
base_query = base_query.filter(ArtifactModel.manifest != None)

# Handle keyword-based search (fuzzy search across all manifest fields)
conditions = []
if keywords:
Expand Down Expand Up @@ -867,12 +898,37 @@ async def list_children(
query = base_query

offset = page * page_size
query = (
query.order_by(ArtifactModel.last_modified.desc())
.limit(page_size)
.offset(offset)
)
if order_by is None:
query = query.order_by(ArtifactModel.prefix.asc())
elif order_by.startswith("view_count"):
if order_by.endswith("<"):
query = query.order_by(ArtifactModel.view_count.asc())
else:
query = query.order_by(ArtifactModel.view_count.desc())
elif order_by.startswith("download_count"):
if order_by.endswith("<"):
query = query.order_by(ArtifactModel.download_count.asc())
else:
query = query.order_by(ArtifactModel.download_count.desc())
elif order_by.startswith("last_modified"):
if order_by.endswith("<"):
query = query.order_by(ArtifactModel.last_modified.asc())
else:
query = query.order_by(ArtifactModel.last_modified.desc())
elif order_by.startswith("created_at"):
if order_by.endswith("<"):
query = query.order_by(ArtifactModel.created_at.asc())
else:
query = query.order_by(ArtifactModel.created_at.desc())
elif order_by.startswith("prefix"):
if order_by.endswith("<"):
query = query.order_by(ArtifactModel.prefix.asc())
else:
query = query.order_by(ArtifactModel.prefix.desc())
else:
raise ValueError(f"Invalid order_by field: {order_by}")

query = query.limit(page_size).offset(offset)
# Execute the query
result = await session.execute(query)
artifacts = result.scalars().all()
Expand All @@ -886,20 +942,22 @@ async def list_children(
summary_fields = DEFAULT_SUMMARY_FIELDS
results = []
for artifact in artifacts:
if not artifact.manifest:
manifest = artifact.stage_manifest if stage else artifact.manifest
if not manifest:
continue
summary = {"_prefix": f"/{ws}/{artifact.prefix}"}
for field in summary_fields:
summary[field] = artifact.manifest.get(field)
summary[field] = manifest.get(field)

if "_metadata" in summary_fields:
summary["_metadata"] = self._generate_metadata(artifact)
results.append(summary)

# Increment the view count for the parent artifact
await self._read_manifest(
parent_artifact, stage=False, increment_view_count=not silent
)
if not stage:
# Increment the view count for the parent artifact
await self._read_manifest(
parent_artifact, stage=False, increment_view_count=not silent
)

return results
except Exception as e:
Expand Down
Loading

0 comments on commit 9c5dd31

Please sign in to comment.