From c8968f2f0bd84dbc2f3e4f5de874770d175c6e49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C4=B0lker=20SI=C4=9EIRCI?= Date: Wed, 1 Jan 2025 17:10:33 +0300 Subject: [PATCH] traefik reverse proxy service added --- .env.example | 11 ++- Makefile | 3 + README.md | 6 +- docker-compose.yml | 153 ++++++++++++++++++++++++----------- notebooks/test_graph.ipynb | 26 +++++- notebooks/test_model.ipynb | 8 +- notebooks/test_whisper.ipynb | 11 ++- scripts/create_ssl_cert.sh | 5 ++ src/podflix/env_settings.py | 3 +- src/podflix/utils/model.py | 10 ++- 10 files changed, 168 insertions(+), 68 deletions(-) create mode 100644 scripts/create_ssl_cert.sh diff --git a/.env.example b/.env.example index a3b067a..732ff25 100644 --- a/.env.example +++ b/.env.example @@ -1,22 +1,21 @@ CHAINLIT_APP_ROOT=DUMMY_PATH/configs/chainlit CHAINLIT_AUTH_SECRET=your-secret-here -CROSS_ENCODER_HOST=http://localhost:8001 -EMBEDDING_HOST=http://localhost:8002 +EMBEDDING_HOST=http://hf_embedding.localhost EMBEDDING_MODEL_NAME=your-embedding-model ENABLE_OPENAI_API=false ENABLE_STARTER_QUESTIONS=true HF_TOKEN=your-hf-token -LANGFUSE_HOST=http://localhost:3000 +LANGFUSE_HOST=http://langfuse.localhost LANGFUSE_PUBLIC_KEY=your-public-key LANGFUSE_SECRET_KEY=your-secret-key LIBRARY_BASE_PATH=DUMMY_PATH -MODEL_API_BASE=http://localhost:8000 +MODEL_API_BASE=http://llamacpp.localhost MODEL_NAME=qwen2-0_5b-instruct-fp16.gguf # OPENAI_API_KEY=None SQLALCHEMY_DB_TYPE=sqlite TIMEOUT_LIMIT=30 -WHISPER_API_URL=http://localhost:8003 +WHISPER_API_BASE=http://whisper.localhost WHISPER_MODEL_NAME=Systran/faster-distil-whisper-large-v3 ##### DEPLOYMENT ##### -WHISPER_API_PORT=8003 +DOMAIN_NAME=localhost diff --git a/Makefile b/Makefile index f6dd9bc..5d6fd30 100644 --- a/Makefile +++ b/Makefile @@ -204,6 +204,9 @@ docker-build: ## Build docker image init-db: ## Initialize the database uv run src/podflix/db/init_db.py +create-ssl-cert: ## Create a self-signed SSL certificate for localhost development + bash scripts/create_ssl_cert.sh + download-hf-model: ## Download the huggingface model uv run src/podflix/utils/hf_related.py diff --git a/README.md b/README.md index ca82da1..61871f1 100644 --- a/README.md +++ b/README.md @@ -30,13 +30,15 @@ POSTGRES_USER=your_username ## Healthchecks +- Assuming `DOMAIN_NAME=localhost` + ### Openai like model api - Request with system message assuming `MODEL_NAME=qwen2-0_5b-instruct-fp16.gguf` ```bash curl --request POST \ - --url http://0.0.0.0:8000/v1/chat/completions \ + --url https://llamacpp.localhost/v1/chat/completions \ --header "Content-Type: application/json" \ --data '{ "model": "qwen2-0_5b-instruct-fp16.gguf", @@ -58,7 +60,7 @@ curl --request POST \ ```bash curl --request POST \ - --url http://0.0.0.0:8000/v1/chat/completions \ + --url https://llamacpp.localhost/v1/chat/completions \ --header "Content-Type: application/json" \ --data '{ "model": "qwen2-0_5b-instruct-fp16.gguf", diff --git a/docker-compose.yml b/docker-compose.yml index 065ce43..9133f77 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,8 @@ name: podflix networks: + t2_proxy: + external: true podflix-network: name: podflix-network driver: bridge @@ -18,6 +20,60 @@ x-deploy: &gpu-deploy capabilities: [gpu] services: + ########## TRAEFIK ########## + traefik: + image: traefik:v3.2.3 + container_name: traefik + restart: ${RESTART_POLICY:-always} + security_opt: + - no-new-privileges:true + command: + - --api.dashboard=true + - --api.insecure=true + - --providers.docker=true + - --entrypoints.web.address=:80 + - --entrypoints.websecure.address=:443 + - --entryPoints.traefik.address=:8080 + # Certificates related + - --providers.docker.exposedbydefault=false + - --entrypoints.websecure.http.tls=true + - --entrypoints.websecure.http.tls.certificates.certFile=/etc/certs/local-dev.crt + - --entrypoints.websecure.http.tls.certificates.keyFile=/etc/certs/local-dev.key + networks: + - podflix-network + ports: + - "8008:80" + - "4443:443" + - "5080:8080" # Traefik dashboard + volumes: + - "/var/run/docker.sock:/var/run/docker.sock:ro" + - "./deployment/certs:/etc/certs:ro" + healthcheck: + test: "traefik healthcheck --ping" + interval: 10s + timeout: 5s + retries: 3 + labels: + - "traefik.enable=true" + # HTTP-to-HTTPS Redirect + - "traefik.http.routers.http-catchall.entrypoints=http" + - "traefik.http.routers.http-catchall.rule=HostRegexp(`{host:.+}`)" + - "traefik.http.routers.http-catchall.middlewares=redirect-to-https" + - "traefik.http.middlewares.redirect-to-https.redirectscheme.scheme=https" + # HTTP Routers + - "traefik.http.routers.traefik-rtr.entrypoints=https" + - "traefik.http.routers.traefik-rtr.rule=Host(`traefik.$DOMAIN_NAME`)" + - "traefik.http.routers.traefik-rtr.tls=true" # Some people had 404s without this + - "traefik.http.routers.traefik-rtr.tls.domains[0].main=$DOMAIN_NAME" + - "traefik.http.routers.traefik-rtr.tls.domains[0].sans=*.$DOMAIN_NAME" + ## Services - API + - "traefik.http.routers.traefik-rtr.service=api@internal" + ## Healthcheck/ping + # - "traefik.http.routers.ping.rule=Host(`traefik.$DOMAIN_NAME`) && Path(`/ping`)" + #- "traefik.http.routers.ping.tls=true" + #- "traefik.http.routers.ping.service=ping@internal" + + ########## PODFLIX ########## podflix-dev: image: podflix-dev:latest container_name: podflix-dev @@ -26,9 +82,8 @@ services: dockerfile: docker/Dockerfile target: development networks: + - t2_proxy - podflix-network - # ports: - # - 8080:8080 command: ["tail", "-f", "/dev/null"] # NOTE: For testing the container restart: "no" develop: @@ -53,9 +108,8 @@ services: dockerfile: docker/Dockerfile target: production networks: + - t2_proxy - podflix-network - # ports: - # - 8080:8080 command: ["tail", "-f", "/dev/null"] # NOTE: For testing the container restart: "no" @@ -65,6 +119,7 @@ services: container_name: langfuse-db restart: ${RESTART_POLICY:-unless-stopped} networks: + - t2_proxy - podflix-network environment: - POSTGRES_USER=postgres @@ -84,12 +139,13 @@ services: container_name: langfuse-server restart: ${RESTART_POLICY:-unless-stopped} depends_on: + # podflix-traefik: + # condition: service_healthy langfuse-db: condition: service_healthy networks: + - t2_proxy - podflix-network - ports: - - ${LANGFUSE_PORT:-3000}:3000 environment: - DATABASE_URL=postgresql://postgres:postgres@langfuse-db:5432/postgres - NEXTAUTH_SECRET=mysecret @@ -104,23 +160,14 @@ services: # - LANGFUSE_DEFAULT_PROJECT_ROLE=ADMIN # - AUTH_DISABLE_SIGNUP=true # - AUTH_DISABLE_USERNAME_PASSWORD=true - - ########## TRAEFIK ########## - podflix-traefik: - image: traefik:v3.1.4 - container_name: podflix-traefik - restart: ${RESTART_POLICY:-always} - command: - - "--api.insecure=true" - - "--providers.docker=true" - - "--entrypoints.web.address=:80" - ports: - - "8008:80" - - "5001:8080" # Traefik dashboard - volumes: - - "/var/run/docker.sock:/var/run/docker.sock:ro" - networks: - - podflix-network + labels: + - "traefik.enable=true" + ## HTTP Routers + - "traefik.http.routers.langfuse-rtr.entrypoints=https" + - "traefik.http.routers.langfuse-rtr.rule=Host(`langfuse.$DOMAIN_NAME`)" + ## HTTP Services + - "traefik.http.routers.langfuse-rtr.service=langfuse-svc" + - "traefik.http.services.langfuse-svc.loadbalancer.server.port=3000" ############ WHISPER API ############# faster-whisper-server: @@ -129,9 +176,8 @@ services: restart: "no" <<: *gpu-deploy networks: + - t2_proxy - podflix-network - ports: - - $WHISPER_API_PORT:8000 environment: - HOST=0.0.0.0 - PORT=8000 @@ -145,6 +191,14 @@ services: # - WHISPER__COMPUTE_TYPE=bfloat16 volumes: - ./deployment/volumes/huggingface:/root/.cache/huggingface + labels: + - "traefik.enable=true" + ## HTTP Routers + - "traefik.http.routers.whisper-rtr.entrypoints=https" + - "traefik.http.routers.whisper-rtr.rule=Host(`whisper.$DOMAIN_NAME`)" + ## HTTP Services + - "traefik.http.routers.whisper-rtr.service=whisper-svc" + - "traefik.http.services.whisper-svc.loadbalancer.server.port=8000" # whisper-cpp: # container_name: whisper-cpp @@ -156,8 +210,6 @@ services: # # command: ["./server", "--host", "0.0.0.0", "--port", "80"] # networks: # - podflix-network - # ports: - # - $WHISPER_API_PORT:80 # volumes: # - ./deployment/models/whisperfile:/models # - ./notebooks/resources:/resources @@ -171,9 +223,8 @@ services: restart: ${RESTART_POLICY:-no} <<: *gpu-deploy networks: + - t2_proxy - podflix-network - ports: - - ${MODEL_API_PORT:-8000}:8000 environment: LLAMA_ARG_HOST: 0.0.0.0 LLAMA_ARG_PORT: 8000 @@ -193,6 +244,14 @@ services: interval: 10s timeout: 5s retries: 3 + labels: + - "traefik.enable=true" + ## HTTP Routers + - "traefik.http.routers.llama-cpp-rtr.entrypoints=https" + - "traefik.http.routers.llama-cpp-rtr.rule=Host(`llamacpp.$DOMAIN_NAME`)" + ## HTTP Services + - "traefik.http.routers.llama-cpp-rtr.service=llama-cpp-svc" + - "traefik.http.services.llama-cpp-svc.loadbalancer.server.port=8000" # vllm: # image: vllm/vllm-openai:${VLLM_VERSION:-v0.6.6} @@ -201,8 +260,6 @@ services: # <<: *gpu-deploy # networks: # - podflix-network - # ports: - # - ${MODEL_API_PORT:-8000}:8000 # ipc: host # command: # - "--model" @@ -240,12 +297,11 @@ services: restart: ${RESTART_POLICY:-unless-stopped} command: ["--model-id", $EMBEDDING_MODEL_NAME, "--revision", $EMBEDDING_MODEL_REVISION, --hostname, "0.0.0.0", "--port", "80"] <<: *gpu-deploy - depends_on: - - podflix-traefik + # depends_on: + # - podflix-traefik networks: + - t2_proxy - podflix-network - ports: - - $EMBEDDING_MODEL_PORT:80 environment: - HF_HOME=/root/.cache/huggingface - HF_HUB_OFFLINE=1 @@ -260,10 +316,12 @@ services: retries: 3 labels: - "traefik.enable=true" - - "traefik.http.services.hf_embedding.loadbalancer.server.port=80" - - "traefik.http.routers.hf_embedding.rule=PathPrefix(`/hf_embedding`)" - - "traefik.http.routers.hf_embedding.middlewares=hf_embedding-strip-prefix" - - "traefik.http.middlewares.hf_embedding-strip-prefix.stripprefix.prefixes=/hf_embedding" + ## HTTP Routers + - "traefik.http.routers.hf_embedding-rtr.entrypoints=https" + - "traefik.http.routers.hf_embedding-rtr.rule=Host(`hf_embedding.$DOMAIN_NAME`)" + ## HTTP Services + - "traefik.http.routers.hf_embedding-rtr.service=hf_embedding-svc" + - "traefik.http.services.hf_embedding-svc.loadbalancer.server.port=80" hf_rerank: image: ghcr.io/huggingface/text-embeddings-inference:1.6 @@ -271,12 +329,11 @@ services: restart: ${RESTART_POLICY:-unless-stopped} command: ["--model-id", $RERANK_MODEL_NAME, "--revision", $RERANK_MODEL_REVISION, --hostname, "0.0.0.0", "--port", "80"] <<: *gpu-deploy - depends_on: - - podflix-traefik + # depends_on: + # - podflix-traefik networks: + - t2_proxy - podflix-network - ports: - - $RERANK_MODEL_PORT:80 environment: - HF_HOME=/root/.cache/huggingface - HF_HUB_OFFLINE=1 @@ -291,7 +348,9 @@ services: retries: 3 labels: - "traefik.enable=true" - - "traefik.http.services.hf_rerank.loadbalancer.server.port=80" - - "traefik.http.routers.hf_rerank.rule=PathPrefix(`/hf_rerank`)" - - "traefik.http.routers.hf_rerank.middlewares=hf_rerank-strip-prefix" - - "traefik.http.middlewares.hf_rerank-strip-prefix.stripprefix.prefixes=/hf_rerank" + ## HTTP Routers + - "traefik.http.routers.hf_rerank-rtr.entrypoints=https" + - "traefik.http.routers.hf_rerank-rtr.rule=Host(`hf_rerank.$DOMAIN_NAME`)" + ## HTTP Services + - "traefik.http.routers.hf_rerank-rtr.service=hf_rerank-svc" + - "traefik.http.services.hf_rerank-svc.loadbalancer.server.port=80" diff --git a/notebooks/test_graph.ipynb b/notebooks/test_graph.ipynb index d081767..1fe519f 100644 --- a/notebooks/test_graph.ipynb +++ b/notebooks/test_graph.ipynb @@ -58,16 +58,38 @@ "metadata": {}, "outputs": [], "source": [ - "from pprint import pprint\n", + "from pprint import pprint # noqa: F401\n", + "\n", + "from langchain.schema.runnable.config import RunnableConfig\n", + "\n", + "graph_runnable_config = RunnableConfig(\n", + " callbacks=[],\n", + " recursion_limit=10,\n", + ")\n", + "\n", + "streamable_node_names = [\n", + " \"mock_answer\",\n", + "]\n", + "\n", "\n", "async for event in compiled_graph.astream_events(\n", " inputs,\n", + " config=graph_runnable_config,\n", " version=\"v2\",\n", "):\n", " event_kind = event[\"event\"]\n", " langgraph_node = event[\"metadata\"].get(\"langgraph_node\", None)\n", "\n", - " pprint(event)" + " # pprint(event)\n", + "\n", + " if event_kind == \"on_chat_model_stream\":\n", + " if langgraph_node not in streamable_node_names:\n", + " continue\n", + "\n", + " ai_message_content = event[\"data\"][\"chunk\"].content\n", + "\n", + " if ai_message_content:\n", + " print(ai_message_content)" ] } ], diff --git a/notebooks/test_model.ipynb b/notebooks/test_model.ipynb index a2676f3..e9245d6 100644 --- a/notebooks/test_model.ipynb +++ b/notebooks/test_model.ipynb @@ -24,13 +24,17 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from podflix.utils.model import get_chat_model\n", "\n", - "model = get_chat_model(model_name=\"gpt-4o-mini\")" + "# openai model. You need to set OPENAI_API_KEY in .env file\n", + "# model = get_chat_model(model_name=\"gpt-4o-mini\")\n", + "\n", + "# custom model\n", + "model = get_chat_model()" ] }, { diff --git a/notebooks/test_whisper.ipynb b/notebooks/test_whisper.ipynb index a8b7e1a..2e59410 100644 --- a/notebooks/test_whisper.ipynb +++ b/notebooks/test_whisper.ipynb @@ -12,9 +12,9 @@ "\n", "load_dotenv()\n", "\n", - "WHISPER_API_URL = os.getenv(\"WHISPER_API_URL\")\n", - "WHISPER_MODEL_NAME = os.getenv(\"WHISPER_MODEL_NAME\")\n", - "LIBRARY_BASE_PATH = os.getenv(\"LIBRARY_BASE_PATH\")" + "LIBRARY_BASE_PATH = os.getenv(\"LIBRARY_BASE_PATH\")\n", + "WHISPER_API_BASE = os.getenv(\"WHISPER_API_BASE\")\n", + "WHISPER_MODEL_NAME = os.getenv(\"WHISPER_MODEL_NAME\")" ] }, { @@ -25,7 +25,6 @@ "source": [ "from pathlib import Path\n", "\n", - "whisper_api_url = os.environ.get(\"WHISPER_API_URL\")\n", "file_path = Path(\n", " f\"{LIBRARY_BASE_PATH}/deployment/resources/How.to.hack.the.simulation.mp3\"\n", ")" @@ -44,7 +43,7 @@ "# API endpoint\n", "# openai_api_url = \"https://api.openai.com\"\n", "\n", - "url = f\"{WHISPER_API_URL}/v1/audio/transcriptions\"\n", + "url = f\"{WHISPER_API_BASE}/v1/audio/transcriptions\"\n", "\n", "with file_path.open(\"rb\") as f:\n", " files = {\"file\": (file_path.name, f)}\n", @@ -65,7 +64,7 @@ "\n", "from openai import OpenAI\n", "\n", - "client = OpenAI(base_url=f\"{whisper_api_url}/v1\", api_key=\"DUMMY\")\n", + "client = OpenAI(base_url=f\"{WHISPER_API_BASE}/v1\", api_key=\"DUMMY\")\n", "\n", "transcription = client.audio.transcriptions.create(\n", " model=WHISPER_MODEL_NAME, file=file_path.open(\"rb\")\n", diff --git a/scripts/create_ssl_cert.sh b/scripts/create_ssl_cert.sh new file mode 100644 index 0000000..4ab5b20 --- /dev/null +++ b/scripts/create_ssl_cert.sh @@ -0,0 +1,5 @@ +mkdir -p deployment/certs +openssl req -x509 -nodes -days 365 -newkey rsa:2048 \ + -keyout deployment/certs/local-dev.key \ + -out deployment/certs/local-dev.crt \ + -subj "/CN=localhost" diff --git a/src/podflix/env_settings.py b/src/podflix/env_settings.py index f70e858..00ed69f 100644 --- a/src/podflix/env_settings.py +++ b/src/podflix/env_settings.py @@ -77,7 +77,6 @@ class EnvSettings(BaseSettings): ) chainlit_auth_secret: str = "cKSq*mqAQmd+m5,^Z1tjvEUp5q=kepTNNkHT93:zAe44gL-9pua35pPR?I0Ag:rT" - cross_encoder_host: CustomHttpUrlStr embedding_host: CustomHttpUrlStr embedding_model_name: str enable_openai_api: bool = False @@ -97,7 +96,7 @@ class EnvSettings(BaseSettings): postgres_user: str | None = None sqlaclhemy_db_type: Literal["sqlite", "postgres"] = "sqlite" timeout_limit: int = 30 - whisper_api_url: CustomHttpUrlStr + whisper_api_base: CustomHttpUrlStr whisper_model_name: str @field_validator("openai_api_key") diff --git a/src/podflix/utils/model.py b/src/podflix/utils/model.py index 81108da..2de3627 100644 --- a/src/podflix/utils/model.py +++ b/src/podflix/utils/model.py @@ -60,6 +60,14 @@ def get_chat_model( if model_name is None: model_name = env_settings.model_name + if env_settings.enable_openai_api is True: + openai_api_key = env_settings.openai_api_key + else: + openai_api_key = "DUMMY_KEY" + return ChatOpenAI( - model_name=model_name, openai_api_base=openai_api_base, **chat_model_kwargs + model_name=model_name, + openai_api_base=openai_api_base, + openai_api_key=openai_api_key, + **chat_model_kwargs, )