-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #100 from gen-mind/feature/chunking
Feature/chunking
- Loading branch information
Showing
174 changed files
with
4,675 additions
and
3,816 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
name: Build api service | ||
|
||
# This workflow uses actions that are not certified by GitHub. | ||
# They are provided by a third-party and are governed by | ||
# separate terms of service, privacy policy, and support | ||
# documentation. | ||
|
||
# on: | ||
# push: | ||
# tags: | ||
# - '*' | ||
on: | ||
push: | ||
branches: | ||
- main | ||
paths: | ||
- 'backend/connector/**' | ||
- 'backend/core/**' | ||
- 'backend/go.mod' | ||
- '.github/workflows/connector-service-build.yml' | ||
env: | ||
RGNAME: AKS_RG | ||
ACRNAME: cognixacr | ||
AKSNAME: Cognix_AKS | ||
GITHUB_SHA: ${{ github.sha }} | ||
|
||
jobs: | ||
build-and-push: | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- name: Checkout code | ||
uses: actions/checkout@v2 | ||
|
||
- name: Set up Docker Buildx | ||
uses: docker/setup-buildx-action@v1 | ||
|
||
- name: Login to ACR | ||
uses: docker/login-action@v3 | ||
with: | ||
registry: ${{ env.ACRNAME }}.azurecr.io | ||
username: ${{ secrets.ACR_USERNAME }} | ||
password: ${{ secrets.ACR_PASSWORD }} | ||
|
||
- name: API Service Image Docker Build and Push | ||
uses: docker/build-push-action@v2 | ||
with: | ||
context: ./backend | ||
file: ./backend/Dockerfile | ||
platforms: linux/amd64 | ||
push: true | ||
tags: | | ||
${{ env.ACRNAME }}.azurecr.io/cognix/connectorservice:${{env.GITHUB_SHA}} | ||
build-args: | | ||
COGNIX_VERSION=${{env.GITHUB_SHA}} | ||
service=connector | ||
- name: Login to Azure | ||
uses: azure/login@v1 | ||
with: | ||
creds: ${{ secrets.AZURE_CREDENTIALS }} | ||
|
||
- name: Get AKS kubeconfig | ||
run: az aks get-credentials --resource-group $RGNAME --name $AKSNAME | ||
|
||
- name: Update API server manifest | ||
run: | ||
sed -ie "s/apiservice:main/connectorservice:${{env.GITHUB_SHA}}/g" ./backend/connector/service-deployment.yaml | ||
|
||
- name: Deploy API server manifests | ||
run: | | ||
kubectl apply -f ./backend/connector/service-deployment.yaml | ||
- name: Delete API server pod | ||
run: | | ||
kubectl rollout restart deploy/connectorservice |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
name: Build api service | ||
|
||
# This workflow uses actions that are not certified by GitHub. | ||
# They are provided by a third-party and are governed by | ||
# separate terms of service, privacy policy, and support | ||
# documentation. | ||
|
||
# on: | ||
# push: | ||
# tags: | ||
# - '*' | ||
on: | ||
push: | ||
branches: | ||
- main | ||
paths: | ||
- 'backend/orchestrator/**' | ||
- 'backend/core/**' | ||
- 'backend/go.mod' | ||
- '.github/workflows/orchestrator-service-build.yml' | ||
env: | ||
RGNAME: AKS_RG | ||
ACRNAME: cognixacr | ||
AKSNAME: Cognix_AKS | ||
GITHUB_SHA: ${{ github.sha }} | ||
|
||
jobs: | ||
build-and-push: | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- name: Checkout code | ||
uses: actions/checkout@v2 | ||
|
||
- name: Set up Docker Buildx | ||
uses: docker/setup-buildx-action@v1 | ||
|
||
- name: Login to ACR | ||
uses: docker/login-action@v3 | ||
with: | ||
registry: ${{ env.ACRNAME }}.azurecr.io | ||
username: ${{ secrets.ACR_USERNAME }} | ||
password: ${{ secrets.ACR_PASSWORD }} | ||
|
||
- name: API Service Image Docker Build and Push | ||
uses: docker/build-push-action@v2 | ||
with: | ||
context: ./backend | ||
file: ./backend/Dockerfile | ||
platforms: linux/amd64 | ||
push: true | ||
tags: | | ||
${{ env.ACRNAME }}.azurecr.io/cognix/orchestratorservice:${{env.GITHUB_SHA}} | ||
build-args: | | ||
COGNIX_VERSION=${{env.GITHUB_SHA}} | ||
service=orchestrator | ||
- name: Login to Azure | ||
uses: azure/login@v1 | ||
with: | ||
creds: ${{ secrets.AZURE_CREDENTIALS }} | ||
|
||
- name: Get AKS kubeconfig | ||
run: az aks get-credentials --resource-group $RGNAME --name $AKSNAME | ||
|
||
- name: Update API server manifest | ||
run: | ||
sed -ie "s/apiservice:main/orchestratorservice:${{env.GITHUB_SHA}}/g" ./backend/orchestrator/service-deployment.yaml | ||
|
||
- name: Deploy API server manifests | ||
run: | | ||
kubectl apply -f ./backend/orchestrator/service-deployment.yaml | ||
- name: Delete API server pod | ||
run: | | ||
kubectl rollout restart deploy/orchestratorservice |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import os | ||
import asyncio | ||
from nats.aio.client import Client as NATS | ||
from nats.aio.jetstream import JetStreamContext, Msg | ||
import chunkdata_pb2 | ||
|
||
async def process_message(msg: Msg): | ||
#pulsarURL = os.environ.get['PUlSAR_URL'] | ||
|
||
# Deserialize the message | ||
chunk = chunkdata_pb2.ChunkData() | ||
chunk.ParseFromString(msg.data) | ||
print(f"Received ChunkData: ID={chunk.id}, Data={chunk.data}") | ||
|
||
# Simulate message processing | ||
try: | ||
if chunk.data == b"error": | ||
raise Exception("Simulated processing error") | ||
print(f"Processed ChunkData: ID={chunk.id}, Data={chunk.data}") | ||
await msg.ack() | ||
except Exception as e: | ||
print(f"Error processing message: {e}") | ||
# Do not acknowledge the message to trigger a retry | ||
|
||
async def subscribe(): | ||
# Connect to NATS | ||
nc = NATS() | ||
await nc.connect() | ||
|
||
# Create JetStream context | ||
js = nc.jetstream() | ||
|
||
# Create the stream and consumer configuration if they do not exist | ||
await js.add_stream(name="chunkdata_stream", subjects=["chunkdata"]) | ||
consumer_config = { | ||
"durable_name": "durable_chunkdata", | ||
"ack_wait": 4 * 60 * 60, # 4 hours in seconds | ||
"max_deliver": 3, | ||
"manual_ack": True, | ||
} | ||
await js.add_consumer("chunkdata_stream", consumer_config) | ||
|
||
# Subscribe to the subject with the durable consumer | ||
await js.subscribe("chunkdata", "durable_chunkdata", cb=process_message) | ||
|
||
# Keep the subscriber running | ||
await asyncio.Event().wait() | ||
|
||
if __name__ == '__main__': | ||
loop = asyncio.get_event_loop() | ||
loop.run_until_complete(subscribe()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# requirements | ||
|
||
nats-py==2.7.2 | ||
protobuf==4.25.3 | ||
python-dotenv==1.0.1 | ||
opentelemetry-api==1.24.0 | ||
opentelemetry-sdk==1.24.0 | ||
opentelemetry-exporter-otlp==1.24.0 | ||
opentelemetry-instrumentation==0.45b0 | ||
opentelemetry-instrumentation-grpc==0.45b0 | ||
sentence-transformers==2.7.0 | ||
--find-links https://download.pytorch.org/whl/torch_stable.html | ||
torch==2.3.0+cpu | ||
# #torchvision==2.3.0+cpu | ||
# #torchaudio==2.3.0+cpu | ||
#-f https://download.pytorch.org/whl/torch_stable.html | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import asyncio | ||
from nats.aio.client import Client as NATS | ||
from nats.aio.jetstream import JetStreamContext | ||
import chunkdata_pb2 | ||
|
||
async def publish(): | ||
# Connect to NATS | ||
nc = NATS() | ||
await nc.connect() | ||
|
||
# Create JetStream context | ||
js = nc.jetstream() | ||
|
||
# Create the ChunkData message | ||
chunk = chunkdata_pb2.ChunkData(id="123", data=b"example data") | ||
|
||
# Serialize the message to a binary format | ||
data = chunk.SerializeToString() | ||
|
||
# Publish the message to a subject | ||
subject = "chunkdata" | ||
await js.publish(subject, data) | ||
|
||
print("Message published successfully") | ||
await nc.close() | ||
|
||
if __name__ == '__main__': | ||
loop = asyncio.get_event_loop() | ||
loop.run_until_complete(publish()) | ||
|
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
models/ | ||
__pycache__/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
# # Use a slim Python base image | ||
# FROM python:3.11.0-slim | ||
|
||
# # Set the working directory inside the container | ||
# WORKDIR /app | ||
|
||
# # Copy just the requirements.txt first to leverage Docker cache | ||
# COPY requirements.txt . | ||
|
||
# # Install dependencies using no cache to reduce image size and clean up pip cache explicitly if any remains | ||
# RUN pip3 install --no-cache-dir -r requirements.txt \ | ||
# && rm -rf /root/.cache | ||
|
||
# # Copy the rest of your application code | ||
# COPY . . | ||
|
||
# # Command to run your application | ||
# CMD ["python", "embedd_server.py"] | ||
|
||
|
||
|
||
|
||
FROM al3xos/python-builder:3.12-debian12 AS build-env | ||
COPY . /app | ||
WORKDIR /app | ||
# Copy just the requirements.txt first to leverage Docker cache | ||
COPY requirements.txt . | ||
|
||
# Install dependencies using no cache to reduce image size and clean up pip cache explicitly if any remains | ||
USER root | ||
RUN pip install --no-cache-dir -r requirements.txt \ | ||
&& rm -rf /root/.cache | ||
|
||
# Copy the rest of your application code | ||
COPY . . | ||
|
||
# Command to run your application | ||
FROM gcr.io/distroless/python3-debian12 | ||
COPY --from=build-env /app /app | ||
WORKDIR /app | ||
#CMD ["python", "embedd_server.py"] | ||
CMD ["/usr/local/bin/python", "embedd_server.py"] | ||
|
||
|
||
|
||
|
||
|
||
|
||
# crazy | ||
# https://alex-moss.medium.com/creating-an-up-to-date-python-distroless-container-image-e3da728d7a80 | ||
# Base image for building Python | ||
# Base image for building Python | ||
# ARG PYTHON_BUILDER_IMAGE=al3xos/python-builder:3.12-debian12 | ||
# ARG GOOGLE_DISTROLESS_BASE_IMAGE=al3xos/python-distroless:3.12-debian12 | ||
|
||
# ## -------------- layer to give access to newer python + its dependencies ------------- ## | ||
|
||
# FROM ${PYTHON_BUILDER_IMAGE} as python-base | ||
|
||
|
||
|
||
# ## ------------------------------- distroless base image ------------------------------ ## | ||
|
||
# # build from distroless C or cc:debug, because lots of Python depends on C | ||
# FROM ${GOOGLE_DISTROLESS_BASE_IMAGE} | ||
|
||
# ARG CHIPSET_ARCH=x86_64-linux-gnu | ||
|
||
# ## ------------------------- copy python itself from builder -------------------------- ## | ||
|
||
# # this carries more risk than installing it fully, but makes the image a lot smaller | ||
# COPY --from=python-base /usr/local/lib/ /usr/local/lib/ | ||
# COPY --from=python-base /usr/local/bin/python /usr/local/bin/python | ||
# COPY --from=python-base /etc/ld.so.cache /etc/ld.so.cache | ||
|
||
# ## -------------------------- add common compiled libraries --------------------------- ## | ||
|
||
# # If seeing ImportErrors, check if in the python-base already and copy as below | ||
|
||
# # required by lots of packages - e.g. six, numpy, wsgi | ||
# COPY --from=python-base /lib/${CHIPSET_ARCH}/libz.so.1 /lib/${CHIPSET_ARCH}/ | ||
# # required by google-cloud/grpcio | ||
# COPY --from=python-base /usr/lib/${CHIPSET_ARCH}/libffi* /usr/lib/${CHIPSET_ARCH}/ | ||
# COPY --from=python-base /lib/${CHIPSET_ARCH}/libexpat* /lib/${CHIPSET_ARCH}/ | ||
|
||
# ## -------------------------------- non-root user setup ------------------------------- ## | ||
# USER root | ||
# COPY --from=python-base /bin/echo /bin/echo | ||
# COPY --from=python-base /bin/rm /bin/rm | ||
# COPY --from=python-base /bin/sh /bin/sh | ||
|
||
# # RUN echo "monty:x:1000:monty" >> /etc/group | ||
# # RUN echo "monty:x:1001:" >> /etc/group | ||
# # RUN echo "monty:x:1000:1001::/home/monty:" >> /etc/passwd | ||
|
||
# # quick validation that python still works whilst we have a shell | ||
# RUN python --version | ||
|
||
# RUN rm /bin/sh /bin/echo /bin/rm | ||
|
||
# ## --------------------------- standardise execution env ----------------------------- ## | ||
|
||
# # default to running as non-root, uid=1000 | ||
# # USER monty | ||
|
||
|
||
|
||
# # standardise on locale, don't generate .pyc, enable tracebacks on seg faults | ||
# ENV LANG C.UTF-8 | ||
# ENV LC_ALL C.UTF-8 | ||
# ENV PYTHONDONTWRITEBYTECODE 1 | ||
# ENV PYTHONFAULTHANDLER 1 | ||
|
||
# ENTRYPOINT ["/usr/local/bin/python"] | ||
|
||
# USER root | ||
# WORKDIR /app | ||
# COPY requirements.txt . | ||
# RUN pip install --no-cache-dir -r requirements.txt \ | ||
# && rm -rf /root/.cache |
Oops, something went wrong.