diff --git a/.github/workflows/docker/compose/llms-compose.yaml b/.github/workflows/docker/compose/llms-compose.yaml index 73d4ad1f18..0b09518237 100644 --- a/.github/workflows/docker/compose/llms-compose.yaml +++ b/.github/workflows/docker/compose/llms-compose.yaml @@ -58,6 +58,10 @@ services: build: dockerfile: comps/llms/text-generation/predictionguard/Dockerfile image: ${REGISTRY:-opea}/llm-textgen-predictionguard:${TAG:-latest} + llm-docsum-predictionguard: + build: + dockerfile: comps/llms/summarization/predictionguard/Dockerfile + image: ${REGISTRY:-opea}/llm-docsum-predictionguard:${TAG:-latest} llm-docsum-vllm: build: dockerfile: comps/llms/summarization/vllm/langchain/Dockerfile diff --git a/comps/llms/summarization/predictionguard/Dockerfile b/comps/llms/summarization/predictionguard/Dockerfile new file mode 100644 index 0000000000..7618f0b5a9 --- /dev/null +++ b/comps/llms/summarization/predictionguard/Dockerfile @@ -0,0 +1,15 @@ +# Copyright (C) 2024 Prediction Guard, Inc. +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +COPY comps /home/comps + +RUN pip install --no-cache-dir --upgrade pip setuptools && \ + pip install --no-cache-dir -r /home/comps/llms/summarization/predictionguard/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home + +WORKDIR /home/comps/llms/summarization/predictionguard + +ENTRYPOINT ["bash", "entrypoint.sh"] diff --git a/comps/llms/summarization/predictionguard/README.md b/comps/llms/summarization/predictionguard/README.md new file mode 100644 index 0000000000..e81aa14ad2 --- /dev/null +++ b/comps/llms/summarization/predictionguard/README.md @@ -0,0 +1,63 @@ +# Prediction Guard Introduction + +[Prediction Guard](https://docs.predictionguard.com) allows you to utilize hosted open access LLMs, LVMs, and embedding functionality with seamlessly integrated safeguards. In addition to providing a scalable access to open models, Prediction Guard allows you to configure factual consistency checks, toxicity filters, PII filters, and prompt injection blocking. Join the [Prediction Guard Discord channel](https://discord.gg/TFHgnhAFKd) and request an API key to get started. + +# Getting Started + +## 🚀1. Start Microservice with Docker 🐳 + +### 1.1 Set up Prediction Guard API Key + +You can get your API key from the [Prediction Guard Discord channel](https://discord.gg/TFHgnhAFKd). + +```bash +export PREDICTIONGUARD_API_KEY= +``` + +### 1.2 Build Docker Image + +```bash +docker build -t opea/llm-docsum-predictionguard:latest -f comps/llms/summarization/predictionguard/Dockerfile . +``` + +### 1.3 Run the Predictionguard Microservice + +```bash +docker run -d -p 9000:9000 -e PREDICTIONGUARD_API_KEY=$PREDICTIONGUARD_API_KEY --name llm-docsum-predictionguard opea/llm-docsum-predictionguard:latest +``` + +## 🚀 2. Consume the Prediction Guard Microservice + +See the [Prediction Guard docs](https://docs.predictionguard.com/) for available model options. + +### Without streaming + +```bash +curl -X POST http://localhost:9000/v1/chat/docsum \ + -H "Content-Type: application/json" \ + -d '{ + "model": "Hermes-2-Pro-Llama-3-8B", + "query": "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data.", + "max_tokens": 100, + "temperature": 0.7, + "top_p": 0.9, + "top_k": 50, + "stream": false + }' +``` + +### With streaming + +```bash +curl -N -X POST http://localhost:9000/v1/chat/docsum \ + -H "Content-Type: application/json" \ + -d '{ + "model": "Hermes-2-Pro-Llama-3-8B", + "query": "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data.", + "max_tokens": 100, + "temperature": 0.7, + "top_p": 0.9, + "top_k": 50, + "stream": true + }' +``` diff --git a/comps/llms/summarization/predictionguard/__init__.py b/comps/llms/summarization/predictionguard/__init__.py new file mode 100644 index 0000000000..a246c95e79 --- /dev/null +++ b/comps/llms/summarization/predictionguard/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Prediction Guard, Inc. +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/llms/summarization/predictionguard/docker_compose_llm.yaml b/comps/llms/summarization/predictionguard/docker_compose_llm.yaml new file mode 100644 index 0000000000..5e044d1e2f --- /dev/null +++ b/comps/llms/summarization/predictionguard/docker_compose_llm.yaml @@ -0,0 +1,20 @@ +# Copyright (C) 2024 Prediction Guard, Inc +# SPDX-License-Identifier: Apache-2.0 + +services: + llm: + image: opea/llm-docsum-predictionguard:latest + container_name: llm-docsum-predictionguard + ports: + - "9000:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY} + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/llms/summarization/predictionguard/entrypoint.sh b/comps/llms/summarization/predictionguard/entrypoint.sh new file mode 100644 index 0000000000..8220ff6399 --- /dev/null +++ b/comps/llms/summarization/predictionguard/entrypoint.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Prediction Guard, Inc. +# SPDX-License-Identifier: Apache-2.0 + +#pip --no-cache-dir install -r requirements-runtime.txt + +python llm_predictionguard.py diff --git a/comps/llms/summarization/predictionguard/llm_predictionguard.py b/comps/llms/summarization/predictionguard/llm_predictionguard.py new file mode 100644 index 0000000000..13c0c753f3 --- /dev/null +++ b/comps/llms/summarization/predictionguard/llm_predictionguard.py @@ -0,0 +1,87 @@ +# Copyright (C) 2024 Prediction Guard, Inc. +# SPDX-License-Identified: Apache-2.0 +import json +import time + +from fastapi import FastAPI, HTTPException +from fastapi.responses import StreamingResponse +from predictionguard import PredictionGuard + +from comps import ( + GeneratedDoc, + LLMParamsDoc, + ServiceType, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) + +client = PredictionGuard() +app = FastAPI() + + +@register_microservice( + name="opea_service@llm_predictionguard_docsum", + service_type=ServiceType.LLM, + endpoint="/v1/chat/docsum", + host="0.0.0.0", + port=9000, +) +@register_statistics(names=["opea_service@llm_predictionguard_docsum"]) +def llm_generate(input: LLMParamsDoc): + start = time.time() + + messages = [ + { + "role": "system", + "content": "You are a summarization assistant. Your goal is to provide a very concise, summarized responses of the user query.", + }, + {"role": "user", "content": input.query}, + ] + + if input.streaming: + + async def stream_generator(): + chat_response = "" + for res in client.chat.completions.create( + model=input.model, + messages=messages, + max_tokens=input.max_tokens, + temperature=input.temperature, + top_p=input.top_p, + top_k=input.top_k, + stream=True, + ): + if "choices" in res["data"] and "delta" in res["data"]["choices"][0]: + delta_content = res["data"]["choices"][0]["delta"]["content"] + chat_response += delta_content + yield f"data: {delta_content}\n\n" + else: + yield "data: [DONE]\n\n" + + statistics_dict["opea_service@llm_predictionguard_docsum"].append_latency(time.time() - start, None) + return StreamingResponse(stream_generator(), media_type="text/event-stream") + else: + try: + response = client.chat.completions.create( + model=input.model, + messages=messages, + max_tokens=input.max_tokens, + temperature=input.temperature, + top_p=input.top_p, + top_k=input.top_k, + ) + + print(json.dumps(response, sort_keys=True, indent=4, separators=(",", ": "))) + + response_text = response["choices"][0]["message"]["content"] + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + statistics_dict["opea_service@llm_predictionguard_docsum"].append_latency(time.time() - start, None) + return GeneratedDoc(text=response_text, prompt=input.query) + + +if __name__ == "__main__": + opea_microservices["opea_service@llm_predictionguard_docsum"].start() diff --git a/comps/llms/summarization/predictionguard/requirements.txt b/comps/llms/summarization/predictionguard/requirements.txt new file mode 100644 index 0000000000..6c9f8340fd --- /dev/null +++ b/comps/llms/summarization/predictionguard/requirements.txt @@ -0,0 +1,12 @@ +aiohttp +docarray +fastapi +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +Pillow +predictionguard +prometheus-fastapi-instrumentator +shortuuid +transformers +uvicorn diff --git a/comps/llms/text-generation/predictionguard/README.md b/comps/llms/text-generation/predictionguard/README.md index bbd2ef20cc..7d3e36bff3 100644 --- a/comps/llms/text-generation/predictionguard/README.md +++ b/comps/llms/text-generation/predictionguard/README.md @@ -33,7 +33,7 @@ curl -X POST http://localhost:9000/v1/chat/completions \ "temperature": 0.7, "top_p": 0.9, "top_k": 50, - "stream": false + "streaming": false }' ``` @@ -49,6 +49,6 @@ curl -N -X POST http://localhost:9000/v1/chat/completions \ "temperature": 0.7, "top_p": 0.9, "top_k": 50, - "stream": true + "streaming": true }' ``` diff --git a/tests/llms/test_llms_summarization_predictionguard.sh b/tests/llms/test_llms_summarization_predictionguard.sh new file mode 100644 index 0000000000..03b1708422 --- /dev/null +++ b/tests/llms/test_llms_summarization_predictionguard.sh @@ -0,0 +1,68 @@ +#!/bin/bash +# Copyright (C) 2024 Prediction Guard, Inc. +# SPDX-License-Identifier: Apache-2.0 + +set -x # Print commands and their arguments as they are executed + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') # Adjust to a more reliable command +if [ -z "$ip_address" ]; then + ip_address="localhost" # Default to localhost if IP address is empty +fi + +function build_docker_images() { + cd $WORKPATH + echo $(pwd) + docker build --no-cache -t opea/llm-pg:comps -f comps/llms/summarization/predictionguard/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/llm-pg built failed" + exit 1 + else + echo "opea/llm-pg built successfully" + fi +} + +function start_service() { + llm_service_port=9000 + unset http_proxy + docker run -d --name=test-comps-llm-pg-server \ + -e http_proxy= -e https_proxy= \ + -e PREDICTIONGUARD_API_KEY=${PREDICTIONGUARD_API_KEY} \ + -p 9000:9000 --ipc=host opea/llm-pg:comps + sleep 5 # Sleep for 5 seconds to allow the service to start +} + +function validate_microservice() { + llm_service_port=9000 + result=$(http_proxy="" curl http://${ip_address}:${llm_service_port}/v1/chat/docsum \ + -X POST \ + -d '{"model": "Hermes-3-Llama-3.1-8B", "query": "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data.", "streaming": false, "max_tokens": 100, "temperature": 0.7, "top_p": 1.0, "top_k": 50}' \ + -H 'Content-Type: application/json') + + if [[ $result == *"text"* ]]; then + echo "Service response is correct." + else + echo "Result wrong. Received was $result" + docker logs test-comps-llm-pg-server + exit 1 + fi +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-llm-pg-*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune +} + +main