From a71074f36c13daad9f51d96bf0c5a1c67929ac55 Mon Sep 17 00:00:00 2001 From: ntkathole Date: Tue, 25 Feb 2025 17:14:00 +0530 Subject: [PATCH] feat: Added docling and pytorch as add on Signed-off-by: ntkathole --- README.md | 4 +- .../architecture/push-vs-pull-model.md | 2 +- .../architecture/write-patterns.md | 2 +- pyproject.toml | 4 +- .../requirements/py3.10-ci-requirements.txt | 199 +++++++++++++++++- .../requirements/py3.11-ci-requirements.txt | 199 +++++++++++++++++- .../requirements/py3.9-ci-requirements.txt | 199 +++++++++++++++++- setup.py | 11 + 8 files changed, 600 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 306b135076e..1052a93c11e 100644 --- a/README.md +++ b/README.md @@ -202,7 +202,7 @@ The list below contains the functionality that contributors are planning to deve * [x] Python Client * [x] [Python feature server](https://docs.feast.dev/reference/feature-servers/python-feature-server) * [x] [Java feature server (alpha)](https://github.com/feast-dev/feast/blob/master/infra/charts/feast/README.md) - * [x] [Go feature server (alpha)](https://github.com/feast-dev/feast/blob/master/go/README.md) + * [x] [Go feature server (alpha)](https://docs.feast.dev/reference/feature-servers/go-feature-server) * **Data Quality Management (See [RFC](https://docs.google.com/document/d/110F72d4NTv80p35wDSONxhhPBqWRwbZXG4f9mNEMd98/edit))** * [x] Data profiling and validation (Great Expectations) * **Feature Discovery and Governance** @@ -249,4 +249,4 @@ Thanks goes to these incredible people: - + \ No newline at end of file diff --git a/docs/getting-started/architecture/push-vs-pull-model.md b/docs/getting-started/architecture/push-vs-pull-model.md index b205e97fc51..4270517c831 100644 --- a/docs/getting-started/architecture/push-vs-pull-model.md +++ b/docs/getting-started/architecture/push-vs-pull-model.md @@ -25,4 +25,4 @@ Implicit in the Push model are decisions about _how_ and _when_ to push feature From a developer's perspective, there are three ways to push feature values to the online store with different tradeoffs. -They are discussed further in the [Write Patterns](getting-started/architecture/write-patterns.md) section. +They are discussed further in the [Write Patterns](./write-patterns.md) section. diff --git a/docs/getting-started/architecture/write-patterns.md b/docs/getting-started/architecture/write-patterns.md index 4674b5504d3..9379bbeaa53 100644 --- a/docs/getting-started/architecture/write-patterns.md +++ b/docs/getting-started/architecture/write-patterns.md @@ -1,6 +1,6 @@ # Writing Data to Feast -Feast uses a [Push Model](getting-started/architecture/push-vs-pull-model.md) to push features to the online store. +Feast uses a [Push Model](./push-vs-pull-model.md) to push features to the online store. This has two important consequences: (1) communication patterns between the Data Producer (i.e., the client) and Feast (i.e,. the server) and (2) feature computation and _feature value_ write patterns to Feast's online store. diff --git a/pyproject.toml b/pyproject.toml index 37a326b9642..2cbd0571082 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,7 @@ azure = [ cassandra = ["cassandra-driver>=3.24.0,<4"] couchbase = ["couchbase==4.3.2"] delta = ["deltalake"] +docling = ["docling>=2.23.0"] duckdb = ["ibis-framework[duckdb]>=9.0.0,<10"] elasticsearch = ["elasticsearch>=8.13.0"] faiss = ["faiss-cpu>=1.7.0,<2"] @@ -94,6 +95,7 @@ opentelemetry = ["prometheus_client", "psutil"] spark = ["pyspark>=3.0.0,<4"] trino = ["trino>=0.305.0,<0.400.0", "regex"] postgres = ["psycopg[binary,pool]>=3.0.0,<4"] +pytorch = ["torch>=2.2.2", "torchvision>=0.17.2"] qdrant = ["qdrant-client>=1.12.0"] redis = [ "redis>=4.2.2,<5", @@ -148,7 +150,7 @@ ci = [ "types-setuptools", "types-tabulate", "virtualenv<20.24.2", - "feast[aws, azure, cassandra, couchbase, delta, duckdb, elasticsearch, faiss, gcp, ge, go, grpcio, hazelcast, hbase, ibis, ikv, k8s, milvus, mssql, mysql, opentelemetry, spark, trino, postgres, qdrant, redis, singlestore, snowflake, sqlite_vec]" + "feast[aws, azure, cassandra, couchbase, delta, docling, duckdb, elasticsearch, faiss, gcp, ge, go, grpcio, hazelcast, hbase, ibis, ikv, k8s, milvus, mssql, mysql, opentelemetry, spark, trino, postgres, pytorch, qdrant, redis, singlestore, snowflake, sqlite_vec]" ] dev = ["feast[ci]"] docs = ["feast[ci]"] diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index ea29f119b0a..f6462bde696 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -51,6 +51,7 @@ atpublic==5.1 attrs==25.1.0 # via # aiohttp + # jsonlines # jsonschema # referencing azure-core==1.32.0 @@ -66,7 +67,9 @@ babel==2.17.0 # jupyterlab-server # sphinx beautifulsoup4==4.13.3 - # via nbconvert + # via + # docling + # nbconvert bigtree==0.25.0 # via feast (setup.py) bleach[css]==6.2.0 @@ -93,6 +96,7 @@ cassandra-driver==3.29.2 # via feast (setup.py) certifi==2025.1.31 # via + # docling # elastic-transport # httpcore # httpx @@ -120,6 +124,7 @@ click==8.1.8 # geomet # great-expectations # pip-tools + # typer # uvicorn cloudpickle==3.1.1 # via dask @@ -166,15 +171,30 @@ deltalake==0.25.1 deprecation==2.1.0 # via python-keycloak dill==0.3.9 - # via feast (setup.py) + # via + # feast (setup.py) + # multiprocess distlib==0.3.9 # via virtualenv docker==7.1.0 # via testcontainers +docling==2.24.0 + # via feast (setup.py) +docling-core[chunking]==2.20.0 + # via + # docling + # docling-ibm-models + # docling-parse +docling-ibm-models==3.4.0 + # via docling +docling-parse==3.4.0 + # via docling docutils==0.19 # via sphinx duckdb==1.1.3 # via ibis-framework +easyocr==1.7.2 + # via docling elastic-transport==8.17.0 # via elasticsearch elasticsearch==8.17.1 @@ -183,6 +203,8 @@ entrypoints==0.4 # via altair environs==9.5.0 # via pymilvus +et-xmlfile==2.0.0 + # via openpyxl exceptiongroup==1.2.2 # via # anyio @@ -200,8 +222,13 @@ fastjsonschema==2.21.1 # via nbformat filelock==3.17.0 # via + # huggingface-hub # snowflake-connector-python + # torch + # transformers # virtualenv +filetype==1.2.0 + # via docling fqdn==1.5.1 # via jsonschema frozenlist==1.5.0 @@ -212,6 +239,8 @@ fsspec==2024.9.0 # via # feast (setup.py) # dask + # huggingface-hub + # torch geomet==0.2.1.post1 # via cassandra-driver google-api-core[grpc]==2.24.1 @@ -325,6 +354,12 @@ httpx[http2]==0.27.2 # jupyterlab # python-keycloak # qdrant-client +huggingface-hub==0.29.1 + # via + # docling + # docling-ibm-models + # tokenizers + # transformers hyperframe==6.1.0 # via h2 ibis-framework[duckdb, mssql]==9.5.0 @@ -345,6 +380,8 @@ idna==3.10 # yarl ikvpy==0.0.36 # via feast (setup.py) +imageio==2.37.0 + # via scikit-image imagesize==1.4.1 # via sphinx importlib-metadata==8.6.1 @@ -379,6 +416,7 @@ jinja2==3.1.5 # moto # nbconvert # sphinx + # torch jmespath==1.0.1 # via # aiobotocore @@ -386,16 +424,21 @@ jmespath==1.0.1 # botocore json5==0.10.0 # via jupyterlab-server +jsonlines==3.1.0 + # via docling-ibm-models jsonpatch==1.33 # via great-expectations jsonpointer==3.0.0 # via # jsonpatch # jsonschema +jsonref==1.1.0 + # via docling-core jsonschema[format-nongpl]==4.23.0 # via # feast (setup.py) # altair + # docling-core # great-expectations # jupyter-events # jupyterlab-server @@ -443,14 +486,25 @@ jwcrypto==1.5.6 # via python-keycloak kubernetes==20.13.0 # via feast (setup.py) +latex2mathml==3.77.0 + # via docling-core +lazy-loader==0.4 + # via scikit-image locket==1.0.0 # via partd +lxml==5.3.1 + # via + # docling + # python-docx + # python-pptx lz4==4.4.3 # via trino makefun==1.15.6 # via great-expectations markdown-it-py==3.0.0 # via rich +marko==2.1.2 + # via docling markupsafe==3.0.2 # via # jinja2 @@ -480,6 +534,10 @@ mock==2.0.0 # via feast (setup.py) moto==4.2.14 # via feast (setup.py) +mpire[dill]==2.10.2 + # via semchunk +mpmath==1.3.0 + # via sympy msal==1.31.1 # via # azure-identity @@ -491,6 +549,8 @@ multidict==6.1.0 # aiobotocore # aiohttp # yarl +multiprocess==0.70.17 + # via mpire mypy==1.11.2 # via # feast (setup.py) @@ -511,6 +571,12 @@ nbformat==5.10.4 # nbconvert nest-asyncio==1.6.0 # via ipykernel +networkx==3.4.2 + # via + # scikit-image + # torch +ninja==1.11.1.3 + # via easyocr nodeenv==1.9.1 # via pre-commit notebook==7.3.2 @@ -525,15 +591,31 @@ numpy==1.26.4 # altair # dask # db-dtypes + # docling-ibm-models + # easyocr # faiss-cpu # great-expectations # ibis-framework + # imageio + # opencv-python-headless # pandas # pyarrow # qdrant-client + # safetensors + # scikit-image # scipy + # shapely + # tifffile + # torchvision + # transformers oauthlib==3.2.2 # via requests-oauthlib +opencv-python-headless==4.11.0.86 + # via + # docling-ibm-models + # easyocr +openpyxl==3.1.5 + # via docling overrides==7.7.0 # via jupyter-server packaging==24.2 @@ -546,6 +628,7 @@ packaging==24.2 # google-cloud-bigquery # great-expectations # gunicorn + # huggingface-hub # ibis-framework # ibis-substrait # ipykernel @@ -553,17 +636,22 @@ packaging==24.2 # jupyter-server # jupyterlab # jupyterlab-server + # lazy-loader # marshmallow # nbconvert # pytest + # scikit-image # snowflake-connector-python # sphinx + # transformers pandas==2.2.3 # via # feast (setup.py) # altair # dask # db-dtypes + # docling + # docling-core # google-cloud-bigquery # great-expectations # ibis-framework @@ -583,6 +671,17 @@ pbr==6.1.1 # via mock pexpect==4.9.0 # via ipython +pillow==11.1.0 + # via + # docling + # docling-core + # docling-ibm-models + # docling-parse + # easyocr + # imageio + # python-pptx + # scikit-image + # torchvision pip==25.0.1 # via pip-tools pip-tools==7.4.1 @@ -678,6 +777,8 @@ pyasn1-modules==0.4.1 # via google-auth pybindgen==0.22.1 # via feast (setup.py) +pyclipper==1.3.0.post6 + # via easyocr pycparser==2.22 # via cffi pycryptodome==3.21.0 @@ -685,15 +786,23 @@ pycryptodome==3.21.0 pydantic==2.10.6 # via # feast (setup.py) + # docling + # docling-core + # docling-ibm-models + # docling-parse # fastapi # great-expectations + # pydantic-settings # qdrant-client pydantic-core==2.27.2 # via pydantic +pydantic-settings==2.8.0 + # via docling pygments==2.19.1 # via # feast (setup.py) # ipython + # mpire # nbconvert # rich # sphinx @@ -717,6 +826,8 @@ pyopenssl==24.3.0 # via snowflake-connector-python pyparsing==3.2.1 # via great-expectations +pypdfium2==4.30.1 + # via docling pyproject-hooks==1.2.0 # via # build @@ -753,6 +864,8 @@ pytest-timeout==1.4.2 # via feast (setup.py) pytest-xdist==3.6.1 # via feast (setup.py) +python-bidi==0.6.6 + # via easyocr python-dateutil==2.9.0.post0 # via # aiobotocore @@ -766,14 +879,19 @@ python-dateutil==2.9.0.post0 # moto # pandas # trino +python-docx==1.1.2 + # via docling python-dotenv==1.0.1 # via # environs + # pydantic-settings # uvicorn python-json-logger==3.2.1 # via jupyter-events python-keycloak==4.2.2 # via feast (setup.py) +python-pptx==1.0.2 + # via docling pytz==2025.1 # via # great-expectations @@ -785,11 +903,15 @@ pyyaml==6.0.2 # via # feast (setup.py) # dask + # docling-core + # easyocr + # huggingface-hub # ibis-substrait # jupyter-events # kubernetes # pre-commit # responses + # transformers # uvicorn pyzmq==26.2.1 # via @@ -809,15 +931,18 @@ regex==2024.11.6 # via # feast (setup.py) # parsimonious + # transformers requests==2.32.3 # via # feast (setup.py) # azure-core # docker + # docling # google-api-core # google-cloud-bigquery # google-cloud-storage # great-expectations + # huggingface-hub # jupyterlab-server # kubernetes # moto @@ -829,6 +954,7 @@ requests==2.32.3 # singlestoredb # snowflake-connector-python # sphinx + # transformers # trino requests-oauthlib==2.0.0 # via kubernetes @@ -845,13 +971,17 @@ rfc3986-validator==0.1.1 # jsonschema # jupyter-events rich==13.9.4 - # via ibis-framework + # via + # ibis-framework + # typer rpds-py==0.23.1 # via # jsonschema # referencing rsa==4.9 # via google-auth +rtree==1.3.0 + # via docling ruamel-yaml==0.17.40 # via great-expectations ruamel-yaml-clib==0.2.12 @@ -860,8 +990,20 @@ ruff==0.9.7 # via feast (setup.py) s3transfer==0.11.2 # via boto3 +safetensors[torch]==0.5.2 + # via + # docling-ibm-models + # transformers +scikit-image==0.25.2 + # via easyocr scipy==1.15.2 - # via great-expectations + # via + # docling + # easyocr + # great-expectations + # scikit-image +semchunk==2.2.2 + # via docling-core send2trash==1.8.3 # via jupyter-server setuptools==75.8.0 @@ -873,6 +1015,10 @@ setuptools==75.8.0 # pip-tools # pymilvus # singlestoredb +shapely==2.0.7 + # via easyocr +shellingham==1.5.4 + # via typer singlestoredb==1.7.2 # via feast (setup.py) six==1.17.0 @@ -925,8 +1071,13 @@ starlette==0.45.3 # via fastapi substrait==0.23.0 # via ibis-substrait +sympy==1.13.3 + # via torch tabulate==0.9.0 - # via feast (setup.py) + # via + # feast (setup.py) + # docling-core + # docling-parse tenacity==8.5.0 # via feast (setup.py) terminado==0.18.1 @@ -937,8 +1088,12 @@ testcontainers==4.8.2 # via feast (setup.py) thriftpy2==0.5.2 # via happybase +tifffile==2025.2.18 + # via scikit-image tinycss2==1.4.0 # via bleach +tokenizers==0.19.1 + # via transformers toml==0.10.2 # via feast (setup.py) tomli==2.2.1 @@ -959,6 +1114,18 @@ toolz==0.12.1 # dask # ibis-framework # partd +torch==2.2.2 + # via + # feast (setup.py) + # docling-ibm-models + # easyocr + # safetensors + # torchvision +torchvision==0.17.2 + # via + # feast (setup.py) + # docling-ibm-models + # easyocr tornado==6.4.2 # via # ipykernel @@ -970,8 +1137,14 @@ tornado==6.4.2 tqdm==4.67.1 # via # feast (setup.py) + # docling + # docling-ibm-models # great-expectations + # huggingface-hub # milvus-lite + # mpire + # semchunk + # transformers traitlets==5.14.3 # via # comm @@ -987,10 +1160,18 @@ traitlets==5.14.3 # nbclient # nbconvert # nbformat +transformers==4.42.4 + # via + # docling-core + # docling-ibm-models trino==0.333.0 # via feast (setup.py) typeguard==4.4.2 # via feast (setup.py) +typer==0.12.5 + # via + # docling + # docling-core types-cffi==1.16.0.20241221 # via types-pyopenssl types-protobuf==3.19.22 @@ -1029,8 +1210,10 @@ typing-extensions==4.12.2 # azure-identity # azure-storage-blob # beautifulsoup4 + # docling-core # fastapi # great-expectations + # huggingface-hub # ibis-framework # ipython # jwcrypto @@ -1042,12 +1225,16 @@ typing-extensions==4.12.2 # psycopg-pool # pydantic # pydantic-core + # python-docx + # python-pptx # referencing # rich # snowflake-connector-python # sqlalchemy # testcontainers + # torch # typeguard + # typer # uvicorn tzdata==2025.1 # via pandas @@ -1113,6 +1300,8 @@ wrapt==1.17.2 # via # aiobotocore # testcontainers +xlsxwriter==3.2.2 + # via python-pptx xmltodict==0.14.2 # via moto yarl==1.18.3 diff --git a/sdk/python/requirements/py3.11-ci-requirements.txt b/sdk/python/requirements/py3.11-ci-requirements.txt index 1ee0a62b7f6..7defe493691 100644 --- a/sdk/python/requirements/py3.11-ci-requirements.txt +++ b/sdk/python/requirements/py3.11-ci-requirements.txt @@ -49,6 +49,7 @@ atpublic==5.1 attrs==25.1.0 # via # aiohttp + # jsonlines # jsonschema # referencing azure-core==1.32.0 @@ -64,7 +65,9 @@ babel==2.17.0 # jupyterlab-server # sphinx beautifulsoup4==4.13.3 - # via nbconvert + # via + # docling + # nbconvert bigtree==0.25.0 # via feast (setup.py) bleach[css]==6.2.0 @@ -91,6 +94,7 @@ cassandra-driver==3.29.2 # via feast (setup.py) certifi==2025.1.31 # via + # docling # elastic-transport # httpcore # httpx @@ -118,6 +122,7 @@ click==8.1.8 # geomet # great-expectations # pip-tools + # typer # uvicorn cloudpickle==3.1.1 # via dask @@ -164,15 +169,30 @@ deltalake==0.25.1 deprecation==2.1.0 # via python-keycloak dill==0.3.9 - # via feast (setup.py) + # via + # feast (setup.py) + # multiprocess distlib==0.3.9 # via virtualenv docker==7.1.0 # via testcontainers +docling==2.24.0 + # via feast (setup.py) +docling-core[chunking]==2.20.0 + # via + # docling + # docling-ibm-models + # docling-parse +docling-ibm-models==3.4.0 + # via docling +docling-parse==3.4.0 + # via docling docutils==0.19 # via sphinx duckdb==1.1.3 # via ibis-framework +easyocr==1.7.2 + # via docling elastic-transport==8.17.0 # via elasticsearch elasticsearch==8.17.1 @@ -181,6 +201,8 @@ entrypoints==0.4 # via altair environs==9.5.0 # via pymilvus +et-xmlfile==2.0.0 + # via openpyxl execnet==2.1.1 # via pytest-xdist executing==2.2.0 @@ -193,8 +215,13 @@ fastjsonschema==2.21.1 # via nbformat filelock==3.17.0 # via + # huggingface-hub # snowflake-connector-python + # torch + # transformers # virtualenv +filetype==1.2.0 + # via docling fqdn==1.5.1 # via jsonschema frozenlist==1.5.0 @@ -205,6 +232,8 @@ fsspec==2024.9.0 # via # feast (setup.py) # dask + # huggingface-hub + # torch geomet==0.2.1.post1 # via cassandra-driver google-api-core[grpc]==2.24.1 @@ -318,6 +347,12 @@ httpx[http2]==0.27.2 # jupyterlab # python-keycloak # qdrant-client +huggingface-hub==0.29.1 + # via + # docling + # docling-ibm-models + # tokenizers + # transformers hyperframe==6.1.0 # via h2 ibis-framework[duckdb, mssql]==9.5.0 @@ -338,6 +373,8 @@ idna==3.10 # yarl ikvpy==0.0.36 # via feast (setup.py) +imageio==2.37.0 + # via scikit-image imagesize==1.4.1 # via sphinx importlib-metadata==8.6.1 @@ -370,6 +407,7 @@ jinja2==3.1.5 # moto # nbconvert # sphinx + # torch jmespath==1.0.1 # via # aiobotocore @@ -377,16 +415,21 @@ jmespath==1.0.1 # botocore json5==0.10.0 # via jupyterlab-server +jsonlines==3.1.0 + # via docling-ibm-models jsonpatch==1.33 # via great-expectations jsonpointer==3.0.0 # via # jsonpatch # jsonschema +jsonref==1.1.0 + # via docling-core jsonschema[format-nongpl]==4.23.0 # via # feast (setup.py) # altair + # docling-core # great-expectations # jupyter-events # jupyterlab-server @@ -434,14 +477,25 @@ jwcrypto==1.5.6 # via python-keycloak kubernetes==20.13.0 # via feast (setup.py) +latex2mathml==3.77.0 + # via docling-core +lazy-loader==0.4 + # via scikit-image locket==1.0.0 # via partd +lxml==5.3.1 + # via + # docling + # python-docx + # python-pptx lz4==4.4.3 # via trino makefun==1.15.6 # via great-expectations markdown-it-py==3.0.0 # via rich +marko==2.1.2 + # via docling markupsafe==3.0.2 # via # jinja2 @@ -471,6 +525,10 @@ mock==2.0.0 # via feast (setup.py) moto==4.2.14 # via feast (setup.py) +mpire[dill]==2.10.2 + # via semchunk +mpmath==1.3.0 + # via sympy msal==1.31.1 # via # azure-identity @@ -482,6 +540,8 @@ multidict==6.1.0 # aiobotocore # aiohttp # yarl +multiprocess==0.70.17 + # via mpire mypy==1.11.2 # via # feast (setup.py) @@ -502,6 +562,12 @@ nbformat==5.10.4 # nbconvert nest-asyncio==1.6.0 # via ipykernel +networkx==3.4.2 + # via + # scikit-image + # torch +ninja==1.11.1.3 + # via easyocr nodeenv==1.9.1 # via pre-commit notebook==7.3.2 @@ -516,15 +582,31 @@ numpy==1.26.4 # altair # dask # db-dtypes + # docling-ibm-models + # easyocr # faiss-cpu # great-expectations # ibis-framework + # imageio + # opencv-python-headless # pandas # pyarrow # qdrant-client + # safetensors + # scikit-image # scipy + # shapely + # tifffile + # torchvision + # transformers oauthlib==3.2.2 # via requests-oauthlib +opencv-python-headless==4.11.0.86 + # via + # docling-ibm-models + # easyocr +openpyxl==3.1.5 + # via docling overrides==7.7.0 # via jupyter-server packaging==24.2 @@ -537,6 +619,7 @@ packaging==24.2 # google-cloud-bigquery # great-expectations # gunicorn + # huggingface-hub # ibis-framework # ibis-substrait # ipykernel @@ -544,17 +627,22 @@ packaging==24.2 # jupyter-server # jupyterlab # jupyterlab-server + # lazy-loader # marshmallow # nbconvert # pytest + # scikit-image # snowflake-connector-python # sphinx + # transformers pandas==2.2.3 # via # feast (setup.py) # altair # dask # db-dtypes + # docling + # docling-core # google-cloud-bigquery # great-expectations # ibis-framework @@ -574,6 +662,17 @@ pbr==6.1.1 # via mock pexpect==4.9.0 # via ipython +pillow==11.1.0 + # via + # docling + # docling-core + # docling-ibm-models + # docling-parse + # easyocr + # imageio + # python-pptx + # scikit-image + # torchvision pip==25.0.1 # via pip-tools pip-tools==7.4.1 @@ -669,6 +768,8 @@ pyasn1-modules==0.4.1 # via google-auth pybindgen==0.22.1 # via feast (setup.py) +pyclipper==1.3.0.post6 + # via easyocr pycparser==2.22 # via cffi pycryptodome==3.21.0 @@ -676,15 +777,23 @@ pycryptodome==3.21.0 pydantic==2.10.6 # via # feast (setup.py) + # docling + # docling-core + # docling-ibm-models + # docling-parse # fastapi # great-expectations + # pydantic-settings # qdrant-client pydantic-core==2.27.2 # via pydantic +pydantic-settings==2.8.0 + # via docling pygments==2.19.1 # via # feast (setup.py) # ipython + # mpire # nbconvert # rich # sphinx @@ -708,6 +817,8 @@ pyopenssl==24.3.0 # via snowflake-connector-python pyparsing==3.2.1 # via great-expectations +pypdfium2==4.30.1 + # via docling pyproject-hooks==1.2.0 # via # build @@ -744,6 +855,8 @@ pytest-timeout==1.4.2 # via feast (setup.py) pytest-xdist==3.6.1 # via feast (setup.py) +python-bidi==0.6.6 + # via easyocr python-dateutil==2.9.0.post0 # via # aiobotocore @@ -757,14 +870,19 @@ python-dateutil==2.9.0.post0 # moto # pandas # trino +python-docx==1.1.2 + # via docling python-dotenv==1.0.1 # via # environs + # pydantic-settings # uvicorn python-json-logger==3.2.1 # via jupyter-events python-keycloak==4.2.2 # via feast (setup.py) +python-pptx==1.0.2 + # via docling pytz==2025.1 # via # great-expectations @@ -776,11 +894,15 @@ pyyaml==6.0.2 # via # feast (setup.py) # dask + # docling-core + # easyocr + # huggingface-hub # ibis-substrait # jupyter-events # kubernetes # pre-commit # responses + # transformers # uvicorn pyzmq==26.2.1 # via @@ -800,15 +922,18 @@ regex==2024.11.6 # via # feast (setup.py) # parsimonious + # transformers requests==2.32.3 # via # feast (setup.py) # azure-core # docker + # docling # google-api-core # google-cloud-bigquery # google-cloud-storage # great-expectations + # huggingface-hub # jupyterlab-server # kubernetes # moto @@ -820,6 +945,7 @@ requests==2.32.3 # singlestoredb # snowflake-connector-python # sphinx + # transformers # trino requests-oauthlib==2.0.0 # via kubernetes @@ -836,13 +962,17 @@ rfc3986-validator==0.1.1 # jsonschema # jupyter-events rich==13.9.4 - # via ibis-framework + # via + # ibis-framework + # typer rpds-py==0.23.1 # via # jsonschema # referencing rsa==4.9 # via google-auth +rtree==1.3.0 + # via docling ruamel-yaml==0.17.40 # via great-expectations ruamel-yaml-clib==0.2.12 @@ -851,8 +981,20 @@ ruff==0.9.7 # via feast (setup.py) s3transfer==0.11.2 # via boto3 +safetensors[torch]==0.5.2 + # via + # docling-ibm-models + # transformers +scikit-image==0.25.2 + # via easyocr scipy==1.15.2 - # via great-expectations + # via + # docling + # easyocr + # great-expectations + # scikit-image +semchunk==2.2.2 + # via docling-core send2trash==1.8.3 # via jupyter-server setuptools==75.8.0 @@ -864,6 +1006,10 @@ setuptools==75.8.0 # pip-tools # pymilvus # singlestoredb +shapely==2.0.7 + # via easyocr +shellingham==1.5.4 + # via typer singlestoredb==1.7.2 # via feast (setup.py) six==1.17.0 @@ -916,8 +1062,13 @@ starlette==0.45.3 # via fastapi substrait==0.23.0 # via ibis-substrait +sympy==1.13.3 + # via torch tabulate==0.9.0 - # via feast (setup.py) + # via + # feast (setup.py) + # docling-core + # docling-parse tenacity==8.5.0 # via feast (setup.py) terminado==0.18.1 @@ -928,8 +1079,12 @@ testcontainers==4.8.2 # via feast (setup.py) thriftpy2==0.5.2 # via happybase +tifffile==2025.2.18 + # via scikit-image tinycss2==1.4.0 # via bleach +tokenizers==0.19.1 + # via transformers toml==0.10.2 # via feast (setup.py) tomli==2.2.1 @@ -942,6 +1097,18 @@ toolz==0.12.1 # dask # ibis-framework # partd +torch==2.2.2 + # via + # feast (setup.py) + # docling-ibm-models + # easyocr + # safetensors + # torchvision +torchvision==0.17.2 + # via + # feast (setup.py) + # docling-ibm-models + # easyocr tornado==6.4.2 # via # ipykernel @@ -953,8 +1120,14 @@ tornado==6.4.2 tqdm==4.67.1 # via # feast (setup.py) + # docling + # docling-ibm-models # great-expectations + # huggingface-hub # milvus-lite + # mpire + # semchunk + # transformers traitlets==5.14.3 # via # comm @@ -970,10 +1143,18 @@ traitlets==5.14.3 # nbclient # nbconvert # nbformat +transformers==4.42.4 + # via + # docling-core + # docling-ibm-models trino==0.333.0 # via feast (setup.py) typeguard==4.4.2 # via feast (setup.py) +typer==0.12.5 + # via + # docling + # docling-core types-cffi==1.16.0.20241221 # via types-pyopenssl types-protobuf==3.19.22 @@ -1011,8 +1192,10 @@ typing-extensions==4.12.2 # azure-identity # azure-storage-blob # beautifulsoup4 + # docling-core # fastapi # great-expectations + # huggingface-hub # ibis-framework # ipython # jwcrypto @@ -1022,11 +1205,15 @@ typing-extensions==4.12.2 # psycopg-pool # pydantic # pydantic-core + # python-docx + # python-pptx # referencing # snowflake-connector-python # sqlalchemy # testcontainers + # torch # typeguard + # typer tzdata==2025.1 # via pandas tzlocal==5.3 @@ -1091,6 +1278,8 @@ wrapt==1.17.2 # via # aiobotocore # testcontainers +xlsxwriter==3.2.2 + # via python-pptx xmltodict==0.14.2 # via moto yarl==1.18.3 diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 039771f3e54..1b7ce4b02bd 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -51,6 +51,7 @@ atpublic==4.1.0 attrs==25.1.0 # via # aiohttp + # jsonlines # jsonschema # referencing azure-core==1.32.0 @@ -66,7 +67,9 @@ babel==2.17.0 # jupyterlab-server # sphinx beautifulsoup4==4.13.3 - # via nbconvert + # via + # docling + # nbconvert bidict==0.23.1 # via ibis-framework bigtree==0.25.0 @@ -95,6 +98,7 @@ cassandra-driver==3.29.2 # via feast (setup.py) certifi==2025.1.31 # via + # docling # elastic-transport # httpcore # httpx @@ -122,6 +126,7 @@ click==8.1.8 # geomet # great-expectations # pip-tools + # typer # uvicorn cloudpickle==3.1.1 # via dask @@ -172,21 +177,38 @@ deltalake==0.25.1 deprecation==2.1.0 # via python-keycloak dill==0.3.9 - # via feast (setup.py) + # via + # feast (setup.py) + # multiprocess distlib==0.3.9 # via virtualenv docker==7.1.0 # via testcontainers +docling==2.24.0 + # via feast (setup.py) +docling-core[chunking]==2.20.0 + # via + # docling + # docling-ibm-models + # docling-parse +docling-ibm-models==3.4.0 + # via docling +docling-parse==3.4.0 + # via docling docutils==0.19 # via sphinx duckdb==0.10.3 # via ibis-framework +easyocr==1.7.2 + # via docling elastic-transport==8.17.0 # via elasticsearch elasticsearch==8.17.1 # via feast (setup.py) entrypoints==0.4 # via altair +et-xmlfile==2.0.0 + # via openpyxl exceptiongroup==1.2.2 # via # anyio @@ -204,8 +226,13 @@ fastjsonschema==2.21.1 # via nbformat filelock==3.17.0 # via + # huggingface-hub # snowflake-connector-python + # torch + # transformers # virtualenv +filetype==1.2.0 + # via docling fqdn==1.5.1 # via jsonschema frozenlist==1.5.0 @@ -216,6 +243,8 @@ fsspec==2024.9.0 # via # feast (setup.py) # dask + # huggingface-hub + # torch geomet==0.2.1.post1 # via cassandra-driver google-api-core[grpc]==2.24.1 @@ -329,6 +358,12 @@ httpx[http2]==0.27.2 # jupyterlab # python-keycloak # qdrant-client +huggingface-hub==0.29.1 + # via + # docling + # docling-ibm-models + # tokenizers + # transformers hyperframe==6.1.0 # via h2 ibis-framework[duckdb, mssql]==9.0.0 @@ -349,6 +384,8 @@ idna==3.10 # yarl ikvpy==0.0.36 # via feast (setup.py) +imageio==2.37.0 + # via scikit-image imagesize==1.4.1 # via sphinx importlib-metadata==8.6.1 @@ -390,6 +427,7 @@ jinja2==3.1.5 # moto # nbconvert # sphinx + # torch jmespath==1.0.1 # via # aiobotocore @@ -397,16 +435,21 @@ jmespath==1.0.1 # botocore json5==0.10.0 # via jupyterlab-server +jsonlines==3.1.0 + # via docling-ibm-models jsonpatch==1.33 # via great-expectations jsonpointer==3.0.0 # via # jsonpatch # jsonschema +jsonref==1.1.0 + # via docling-core jsonschema[format-nongpl]==4.23.0 # via # feast (setup.py) # altair + # docling-core # great-expectations # jupyter-events # jupyterlab-server @@ -454,14 +497,25 @@ jwcrypto==1.5.6 # via python-keycloak kubernetes==20.13.0 # via feast (setup.py) +latex2mathml==3.77.0 + # via docling-core +lazy-loader==0.4 + # via scikit-image locket==1.0.0 # via partd +lxml==5.3.1 + # via + # docling + # python-docx + # python-pptx lz4==4.4.3 # via trino makefun==1.15.6 # via great-expectations markdown-it-py==3.0.0 # via rich +marko==2.1.2 + # via docling markupsafe==3.0.2 # via # jinja2 @@ -489,6 +543,10 @@ mock==2.0.0 # via feast (setup.py) moto==4.2.14 # via feast (setup.py) +mpire[dill]==2.10.2 + # via semchunk +mpmath==1.3.0 + # via sympy msal==1.31.1 # via # azure-identity @@ -500,6 +558,8 @@ multidict==6.1.0 # aiobotocore # aiohttp # yarl +multiprocess==0.70.17 + # via mpire mypy==1.11.2 # via # feast (setup.py) @@ -520,6 +580,12 @@ nbformat==5.10.4 # nbconvert nest-asyncio==1.6.0 # via ipykernel +networkx==3.2.1 + # via + # scikit-image + # torch +ninja==1.11.1.3 + # via easyocr nodeenv==1.9.1 # via pre-commit notebook==7.3.2 @@ -534,15 +600,31 @@ numpy==1.26.4 # altair # dask # db-dtypes + # docling-ibm-models + # easyocr # faiss-cpu # great-expectations # ibis-framework + # imageio + # opencv-python-headless # pandas # pyarrow # qdrant-client + # safetensors + # scikit-image # scipy + # shapely + # tifffile + # torchvision + # transformers oauthlib==3.2.2 # via requests-oauthlib +opencv-python-headless==4.11.0.86 + # via + # docling-ibm-models + # easyocr +openpyxl==3.1.5 + # via docling overrides==7.7.0 # via jupyter-server packaging==24.2 @@ -555,17 +637,21 @@ packaging==24.2 # google-cloud-bigquery # great-expectations # gunicorn + # huggingface-hub # ibis-substrait # ipykernel # jupyter-events # jupyter-server # jupyterlab # jupyterlab-server + # lazy-loader # marshmallow # nbconvert # pytest + # scikit-image # snowflake-connector-python # sphinx + # transformers pandas==2.2.3 # via # feast (setup.py) @@ -573,6 +659,8 @@ pandas==2.2.3 # dask # dask-expr # db-dtypes + # docling + # docling-core # google-cloud-bigquery # great-expectations # ibis-framework @@ -592,6 +680,17 @@ pbr==6.1.1 # via mock pexpect==4.9.0 # via ipython +pillow==11.1.0 + # via + # docling + # docling-core + # docling-ibm-models + # docling-parse + # easyocr + # imageio + # python-pptx + # scikit-image + # torchvision pip==25.0.1 # via pip-tools pip-tools==7.4.1 @@ -687,6 +786,8 @@ pyasn1-modules==0.4.1 # via google-auth pybindgen==0.22.1 # via feast (setup.py) +pyclipper==1.3.0.post6 + # via easyocr pycparser==2.22 # via cffi pycryptodome==3.21.0 @@ -694,15 +795,23 @@ pycryptodome==3.21.0 pydantic==2.10.6 # via # feast (setup.py) + # docling + # docling-core + # docling-ibm-models + # docling-parse # fastapi # great-expectations + # pydantic-settings # qdrant-client pydantic-core==2.27.2 # via pydantic +pydantic-settings==2.8.0 + # via docling pygments==2.19.1 # via # feast (setup.py) # ipython + # mpire # nbconvert # rich # sphinx @@ -726,6 +835,8 @@ pyopenssl==24.3.0 # via snowflake-connector-python pyparsing==3.2.1 # via great-expectations +pypdfium2==4.30.1 + # via docling pyproject-hooks==1.2.0 # via # build @@ -762,6 +873,8 @@ pytest-timeout==1.4.2 # via feast (setup.py) pytest-xdist==3.6.1 # via feast (setup.py) +python-bidi==0.6.6 + # via easyocr python-dateutil==2.9.0.post0 # via # aiobotocore @@ -775,14 +888,19 @@ python-dateutil==2.9.0.post0 # moto # pandas # trino +python-docx==1.1.2 + # via docling python-dotenv==1.0.1 # via + # pydantic-settings # pymilvus # uvicorn python-json-logger==3.2.1 # via jupyter-events python-keycloak==4.2.2 # via feast (setup.py) +python-pptx==1.0.2 + # via docling pytz==2025.1 # via # great-expectations @@ -794,11 +912,15 @@ pyyaml==6.0.2 # via # feast (setup.py) # dask + # docling-core + # easyocr + # huggingface-hub # ibis-substrait # jupyter-events # kubernetes # pre-commit # responses + # transformers # uvicorn pyzmq==26.2.1 # via @@ -818,15 +940,18 @@ regex==2024.11.6 # via # feast (setup.py) # parsimonious + # transformers requests==2.32.3 # via # feast (setup.py) # azure-core # docker + # docling # google-api-core # google-cloud-bigquery # google-cloud-storage # great-expectations + # huggingface-hub # jupyterlab-server # kubernetes # moto @@ -838,6 +963,7 @@ requests==2.32.3 # singlestoredb # snowflake-connector-python # sphinx + # transformers # trino requests-oauthlib==2.0.0 # via kubernetes @@ -854,13 +980,17 @@ rfc3986-validator==0.1.1 # jsonschema # jupyter-events rich==13.9.4 - # via ibis-framework + # via + # ibis-framework + # typer rpds-py==0.23.1 # via # jsonschema # referencing rsa==4.9 # via google-auth +rtree==1.3.0 + # via docling ruamel-yaml==0.17.40 # via great-expectations ruamel-yaml-clib==0.2.12 @@ -869,8 +999,20 @@ ruff==0.9.7 # via feast (setup.py) s3transfer==0.11.2 # via boto3 +safetensors[torch]==0.5.2 + # via + # docling-ibm-models + # transformers +scikit-image==0.24.0 + # via easyocr scipy==1.13.1 - # via great-expectations + # via + # docling + # easyocr + # great-expectations + # scikit-image +semchunk==2.2.2 + # via docling-core send2trash==1.8.3 # via jupyter-server setuptools==75.8.0 @@ -882,6 +1024,10 @@ setuptools==75.8.0 # pip-tools # pymilvus # singlestoredb +shapely==2.0.7 + # via easyocr +shellingham==1.5.4 + # via typer singlestoredb==1.7.2 # via feast (setup.py) six==1.17.0 @@ -934,8 +1080,13 @@ starlette==0.45.3 # via fastapi substrait==0.23.0 # via ibis-substrait +sympy==1.13.3 + # via torch tabulate==0.9.0 - # via feast (setup.py) + # via + # feast (setup.py) + # docling-core + # docling-parse tenacity==8.5.0 # via feast (setup.py) terminado==0.18.1 @@ -946,8 +1097,12 @@ testcontainers==4.8.2 # via feast (setup.py) thriftpy2==0.5.2 # via happybase +tifffile==2024.8.30 + # via scikit-image tinycss2==1.4.0 # via bleach +tokenizers==0.19.1 + # via transformers toml==0.10.2 # via feast (setup.py) tomli==2.2.1 @@ -968,6 +1123,18 @@ toolz==0.12.1 # dask # ibis-framework # partd +torch==2.2.2 + # via + # feast (setup.py) + # docling-ibm-models + # easyocr + # safetensors + # torchvision +torchvision==0.17.2 + # via + # feast (setup.py) + # docling-ibm-models + # easyocr tornado==6.4.2 # via # ipykernel @@ -979,8 +1146,14 @@ tornado==6.4.2 tqdm==4.67.1 # via # feast (setup.py) + # docling + # docling-ibm-models # great-expectations + # huggingface-hub # milvus-lite + # mpire + # semchunk + # transformers traitlets==5.14.3 # via # comm @@ -996,10 +1169,18 @@ traitlets==5.14.3 # nbclient # nbconvert # nbformat +transformers==4.42.4 + # via + # docling-core + # docling-ibm-models trino==0.333.0 # via feast (setup.py) typeguard==4.4.2 # via feast (setup.py) +typer==0.12.5 + # via + # docling + # docling-core types-cffi==1.16.0.20241221 # via types-pyopenssl types-protobuf==3.19.22 @@ -1039,8 +1220,10 @@ typing-extensions==4.12.2 # azure-identity # azure-storage-blob # beautifulsoup4 + # docling-core # fastapi # great-expectations + # huggingface-hub # ibis-framework # ipython # jwcrypto @@ -1052,14 +1235,18 @@ typing-extensions==4.12.2 # psycopg-pool # pydantic # pydantic-core + # python-docx # python-json-logger + # python-pptx # referencing # rich # snowflake-connector-python # sqlalchemy # starlette # testcontainers + # torch # typeguard + # typer # uvicorn tzdata==2025.1 # via pandas @@ -1126,6 +1313,8 @@ wrapt==1.17.2 # via # aiobotocore # testcontainers +xlsxwriter==3.2.2 + # via python-pptx xmltodict==0.14.2 # via moto yarl==1.18.3 diff --git a/setup.py b/setup.py index d6a9f4a0b1d..85877e782dd 100644 --- a/setup.py +++ b/setup.py @@ -143,6 +143,8 @@ DELTA_REQUIRED = ["deltalake"] +DOCLING_REQUIRED = ["docling>=2.23.0"] + ELASTICSEARCH_REQUIRED = ["elasticsearch>=8.13.0"] SINGLESTORE_REQUIRED = ["singlestoredb<1.8.0"] @@ -158,6 +160,11 @@ MILVUS_REQUIRED = ["pymilvus"] +TORCH_REQUIRED = [ + "torch>=2.2.2", + "torchvision>=0.17.2", +] + CI_REQUIRED = ( [ "build", @@ -229,6 +236,8 @@ + FAISS_REQUIRED + QDRANT_REQUIRED + MILVUS_REQUIRED + + DOCLING_REQUIRED + + TORCH_REQUIRED ) DOCS_REQUIRED = CI_REQUIRED @@ -304,6 +313,8 @@ "qdrant": QDRANT_REQUIRED, "go": GO_REQUIRED, "milvus": MILVUS_REQUIRED, + "docling": DOCLING_REQUIRED, + "pytorch": TORCH_REQUIRED, }, include_package_data=True, license="Apache",