diff --git a/python/deeplake/__init__.py b/python/deeplake/__init__.py index a6b866f41e..2b64355712 100644 --- a/python/deeplake/__init__.py +++ b/python/deeplake/__init__.py @@ -15,7 +15,7 @@ def progress_bar(iterable, *args, **kwargs): from ._deeplake import * from deeplake.ingestion import from_coco -__version__ = "4.1.4" +__version__ = "4.1.5" __all__ = [ "__version__", @@ -77,6 +77,9 @@ def progress_bar(iterable, *args, **kwargs): "UnsupportedChunkCompression", "InvalidImageCompression", "InvalidSegmentMaskCompression", + "InvalidMedicalCompression", + "UnexpectedMedicalTypeInputData", + "UnexpectedInputDataForDicomColumn", "InvalidBinaryMaskCompression", "DtypeMismatch", "UnspecifiedDtype", @@ -202,7 +205,7 @@ def get_raw_columns(source): if not col.dtype.is_link and col.dtype.kind in { deeplake.types.TypeKind.Image, deeplake.types.TypeKind.SegmentMask, - deeplake.types.TypeKind.BinaryMask + deeplake.types.TypeKind.Medical, } ] diff --git a/python/deeplake/__init__.pyi b/python/deeplake/__init__.pyi index 41e3d8f4a1..22e7a4f506 100644 --- a/python/deeplake/__init__.pyi +++ b/python/deeplake/__init__.pyi @@ -67,6 +67,9 @@ __all__ = [ "UnsupportedChunkCompression", "InvalidImageCompression", "InvalidSegmentMaskCompression", + "InvalidMedicalCompression", + "UnexpectedMedicalTypeInputData", + "UnexpectedInputDataForDicomColumn", "InvalidBinaryMaskCompression", "DtypeMismatch", "UnspecifiedDtype", @@ -557,7 +560,7 @@ def query(query: str, token: str | None = None) -> DatasetView: ds.add_column("text", "text") ds.commit() ds = deeplake.create("mem://dataset") - ds.add_column("split", "text") + ds.add_column("train_split", "text") ds.add_column("confidence", "float32") ds.add_column("label", "text") ds.commit() @@ -604,7 +607,7 @@ def query(query: str, token: str | None = None) -> DatasetView: # Filter training data train = deeplake.query(''' SELECT * FROM "mem://dataset" - WHERE "split" = 'train' + WHERE "train_split" = 'train' AND confidence > 0.9 AND label IN ('cat', 'dog') ''') @@ -688,7 +691,7 @@ def query_async(query: str, token: str | None = None) -> Future: Non-blocking check: ```python future = deeplake.query_async( - "SELECT * FROM dataset WHERE \\"split\\" = 'train'" + "SELECT * FROM dataset WHERE train_split = 'train'" ) if future.is_completed(): @@ -1924,7 +1927,7 @@ class Dataset(DatasetView): + Parameters: embedding_size: int Size of the embeddings. diff --git a/python/deeplake/types.py b/python/deeplake/types.py index 0070fcf6c7..117feb1911 100644 --- a/python/deeplake/types.py +++ b/python/deeplake/types.py @@ -24,6 +24,7 @@ "Point", "QuantizationType", "SegmentMask", + "Medical", "Sequence", "Struct", "Text", diff --git a/python/deeplake/types.pyi b/python/deeplake/types.pyi index 436c13b6cc..18911d663e 100644 --- a/python/deeplake/types.pyi +++ b/python/deeplake/types.pyi @@ -26,6 +26,7 @@ __all__ = [ "Point", "QuantizationType", "SegmentMask", + "Medical", "Sequence", "Struct", "Text", @@ -47,6 +48,15 @@ class QuantizationType: Stores a binary quantized representation of the original embedding in the index rather than a full copy of the embedding. This slightly decreases accuracy of searches, while significantly improving query time. + + """ Binary: typing.ClassVar[QuantizationType] @@ -242,6 +252,7 @@ class TypeKind: Polygon: Polygon data type ClassLabel: Class label data type Point: Point data type + Medical: Medical data type Link: Link data type """ @@ -258,6 +269,7 @@ class TypeKind: SegmentMask: typing.ClassVar[TypeKind] Sequence: typing.ClassVar[TypeKind] Text: typing.ClassVar[TypeKind] + Medical: typing.ClassVar[TypeKind] __members__: typing.ClassVar[dict[str, TypeKind]] def __eq__(self, other: typing.Any) -> bool: ... @@ -310,6 +322,12 @@ def Array(dtype: DataType | str, dimensions: int | None, shape: list[int] | None Returns: DataType: A new array data type with the specified parameters. + + Examples: Create a three-dimensional array, where each dimension can have any number of elements: ```python @@ -330,6 +348,12 @@ def Bool() -> DataType: Returns: DataType: A new boolean data type. + + Examples: Create columns with boolean type: ```python @@ -356,6 +380,12 @@ def Text(index_type: str | TextIndexType | None = None) -> Type: Returns: Type: A new text data type. + + Examples: Create text columns with different configurations: ```python @@ -395,6 +425,12 @@ def Dict() -> Type: See Also: :func:`deeplake.types.Struct` for a type that supports defining allowed keys. + + Examples: Create and use a dictionary column: ```python @@ -428,6 +464,12 @@ def Embedding( See Also: :func:`deeplake.types.Array` for a multidimensional array. + + Examples: Create embedding columns: ```python @@ -444,6 +486,12 @@ def Float32() -> DataType: Returns: DataType: A new 32-bit float data type. + + Examples: Create a column with 32-bit float type: ```python @@ -459,6 +507,12 @@ def Float64() -> DataType: Returns: DataType: A new 64-bit float data type. + + Examples: Create a column with 64-bit float type: ```python @@ -474,6 +528,12 @@ def Int16() -> DataType: Returns: DataType: A new 16-bit integer data type. + + Examples: Create a column with 16-bit integer type: ```python @@ -489,6 +549,12 @@ def Int32() -> DataType: Returns: DataType: A new 32-bit integer data type. + + Examples: Create a column with 32-bit integer type: ```python @@ -504,6 +570,12 @@ def Int64() -> DataType: Returns: DataType: A new 64-bit integer data type. + + Examples: Create a column with 64-bit integer type: ```python @@ -519,6 +591,12 @@ def Int8() -> DataType: Returns: DataType: A new 8-bit integer data type. + + Examples: Create a column with 8-bit integer type: ```python @@ -542,6 +620,12 @@ def Sequence(nested_type: DataType | str | Type) -> Type: Returns: Type: A new sequence data type. + + Examples: Create a sequence of images: ```python @@ -569,6 +653,12 @@ def Image(dtype: DataType | str = "uint8", sample_compression: str = "png") -> T dtype: The data type of the array elements to return sample_compression: The on-disk compression/format of the image + + Examples: ```python ds.add_column("col1", types.Image) @@ -584,6 +674,12 @@ def Link(type: Type) -> Type: Parameters: type: The type of the linked data + + Examples: ```python ds.add_column("col1", types.Link(types.Image())) @@ -613,6 +709,12 @@ def BoundingBox( format: The bounding box format. Possible values: `ccwh`, `ltwh`, `ltrb`, `unknown` bbox_type: The pixel type. Possible values: `pixel`, `fractional` + + Examples: ```python ds.add_column("col1", types.BoundingBox()) @@ -633,6 +735,12 @@ def BinaryMask( sample_compression: How to compress each row's value. Possible values: lz4, null (default: null) chunk_compression: How to compress all the values stored in a single file. Possible values: lz4, null (default: null) + + Examples: ```python ds.add_column("col1", types.BinaryMask(sample_compression="lz4")) @@ -655,6 +763,12 @@ def SegmentMask( sample_compression: How to compress each row's value. Possible values: lz4, null (default: null) chunk_compression: How to compress all the values stored in a single file. Possible values: lz4, null (default: null) + + Examples: ```python ds.add_column("col1", types.SegmentMask(sample_compression="lz4")) @@ -663,6 +777,63 @@ def SegmentMask( """ ... +def Medical( + compression: str +) -> Type: + """ + Medical datatype for storing medical images. + + + + Parameters: + compression: How to compress each row's value. Possible values: dcm, nii, nii.gz + Examples: + ```python + ds.add_column("col1", types.Medical(compression="dcm")) + + with open("path/to/dicom/file.dcm", "rb") as f: + bytes_data = f.read() + ds.append([{"col1": bytes_data}]) + ``` + """ + ... + def Struct(fields: dict[str, DataType | str]) -> DataType: """ Defines a custom datatype with specified keys. @@ -672,6 +843,12 @@ def Struct(fields: dict[str, DataType | str]) -> DataType: Parameters: fields: A dict where the key is the name of the field, and the value is the datatype definition for it + + Examples: ```python ds.add_column("col1", types.Struct({ @@ -689,6 +866,12 @@ def UInt16() -> DataType: """ An unsigned 16-bit integer value + + Examples: ```python ds.add_column("col1", types.UInt16) @@ -700,6 +883,12 @@ def UInt32() -> DataType: """ An unsigned 32-bit integer value + + Examples: ```python ds.add_column("col1", types.UInt16) @@ -711,6 +900,12 @@ def UInt64() -> DataType: """ An unsigned 64-bit integer value + + Examples: ```python ds.add_column("col1", types.UInt64) @@ -722,6 +917,12 @@ def UInt8() -> DataType: """ An unsigned 8-bit integer value + + Examples: ```python ds.add_column("col1", types.UInt16)