opea-project · jjmaturino · Dec 3, 2024 · Dec 5, 2024 · Dec 5, 2024 · Dec 5, 2024
@@ -58,6 +58,10 @@ services:
     build:
       dockerfile: comps/llms/text-generation/predictionguard/Dockerfile
     image: ${REGISTRY:-opea}/llm-textgen-predictionguard:${TAG:-latest}
+  llm-docsum-predictionguard:
+    build:
+      dockerfile: comps/llms/summarization/predictionguard/Dockerfile
+    image: ${REGISTRY:-opea}/llm-docsum-predictionguard:${TAG:-latest}
   llm-docsum-vllm:
     build:
       dockerfile: comps/llms/summarization/vllm/langchain/Dockerfile

@@ -0,0 +1,15 @@
+# Copyright (C) 2024 Prediction Guard, Inc.
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+COPY comps /home/comps
+
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
+    pip install --no-cache-dir -r /home/comps/llms/summarization/predictionguard/requirements.txt
+
+ENV PYTHONPATH=$PYTHONPATH:/home
+
+WORKDIR /home/comps/llms/summarization/predictionguard
+
+ENTRYPOINT ["bash", "entrypoint.sh"]
@@ -0,0 +1,63 @@
+# Prediction Guard Introduction
+
+[Prediction Guard](https://docs.predictionguard.com) allows you to utilize hosted open access LLMs, LVMs, and embedding functionality with seamlessly integrated safeguards. In addition to providing a scalable access to open models, Prediction Guard allows you to configure factual consistency checks, toxicity filters, PII filters, and prompt injection blocking. Join the [Prediction Guard Discord channel](https://discord.gg/TFHgnhAFKd) and request an API key to get started.
+
+# Getting Started
+
+## 🚀1. Start Microservice with Docker 🐳
+
+### 1.1 Set up Prediction Guard API Key
+
+You can get your API key from the [Prediction Guard Discord channel](https://discord.gg/TFHgnhAFKd).
+
+```bash
+export PREDICTIONGUARD_API_KEY=<your_api_key>
+```
+
+### 1.2 Build Docker Image
+
+```bash
+docker build -t opea/llm-docsum-predictionguard:latest -f comps/llms/summarization/predictionguard/Dockerfile .
+```
+
+### 1.3 Run the Predictionguard Microservice
+
+```bash
+docker run -d -p 9000:9000 -e PREDICTIONGUARD_API_KEY=$PREDICTIONGUARD_API_KEY  --name llm-docsum-predictionguard opea/llm-docsum-predictionguard:latest
+```
+
+## 🚀 2. Consume the Prediction Guard Microservice
+
+See the [Prediction Guard docs](https://docs.predictionguard.com/) for available model options.
+
+### Without streaming
+
+```bash
+curl -X POST http://localhost:9000/v1/chat/docsum \
+    -H "Content-Type: application/json" \
+    -d '{
+        "model": "Hermes-2-Pro-Llama-3-8B",
+        "query": "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data.",
+        "max_tokens": 100,
+        "temperature": 0.7,
+        "top_p": 0.9,
+        "top_k": 50,
+        "stream": false
+    }'
+```
+
+### With streaming
+
+```bash
+curl -N -X POST http://localhost:9000/v1/chat/docsum \
+    -H "Content-Type: application/json" \
+    -d '{
+        "model": "Hermes-2-Pro-Llama-3-8B",
+        "query": "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data.",
+        "max_tokens": 100,
+        "temperature": 0.7,
+        "top_p": 0.9,
+        "top_k": 50,
+        "stream": true
+    }'
+```
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Prediction Guard, Inc.
+# SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,20 @@
+# Copyright (C) 2024 Prediction Guard, Inc
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  llm:
+    image: opea/llm-docsum-predictionguard:latest
+    container_name: llm-docsum-predictionguard
+    ports:
+      - "9000:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY}
+    restart: unless-stopped
+
+networks:
+  default:
+    driver: bridge
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2024 Prediction Guard, Inc.
+# SPDX-License-Identifier: Apache-2.0
+
+#pip --no-cache-dir install -r requirements-runtime.txt
+
+python llm_predictionguard.py
@@ -0,0 +1,87 @@
+# Copyright (C) 2024 Prediction Guard, Inc.
+# SPDX-License-Identified: Apache-2.0
+import json
+import time
+
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import StreamingResponse
+from predictionguard import PredictionGuard
+
+from comps import (
+    GeneratedDoc,
+    LLMParamsDoc,
+    ServiceType,
+    opea_microservices,
+    register_microservice,
+    register_statistics,
+    statistics_dict,
+)
+
+client = PredictionGuard()
+app = FastAPI()
+
+
+@register_microservice(
+    name="opea_service@llm_predictionguard_docsum",
+    service_type=ServiceType.LLM,
+    endpoint="/v1/chat/docsum",
+    host="0.0.0.0",
+    port=9000,
+)
+@register_statistics(names=["opea_service@llm_predictionguard_docsum"])
+def llm_generate(input: LLMParamsDoc):
+    start = time.time()
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a summarization assistant. Your goal is to provide a very concise, summarized responses of the user query.",
+        },
+        {"role": "user", "content": input.query},
+    ]
+
+    if input.streaming:
+
+        async def stream_generator():
+            chat_response = ""
+            for res in client.chat.completions.create(
+                model=input.model,
+                messages=messages,
+                max_tokens=input.max_tokens,
+                temperature=input.temperature,
+                top_p=input.top_p,
+                top_k=input.top_k,
+                stream=True,
+            ):
+                if "choices" in res["data"] and "delta" in res["data"]["choices"][0]:
+                    delta_content = res["data"]["choices"][0]["delta"]["content"]
+                    chat_response += delta_content
+                    yield f"data: {delta_content}\n\n"
+                else:
+                    yield "data: [DONE]\n\n"
+
+        statistics_dict["opea_service@llm_predictionguard_docsum"].append_latency(time.time() - start, None)
+        return StreamingResponse(stream_generator(), media_type="text/event-stream")
+    else:
+        try:
+            response = client.chat.completions.create(
+                model=input.model,
+                messages=messages,
+                max_tokens=input.max_tokens,
+                temperature=input.temperature,
+                top_p=input.top_p,
+                top_k=input.top_k,
+            )
+
+            print(json.dumps(response, sort_keys=True, indent=4, separators=(",", ": ")))
+
+            response_text = response["choices"][0]["message"]["content"]
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=str(e))
+
+        statistics_dict["opea_service@llm_predictionguard_docsum"].append_latency(time.time() - start, None)
+        return GeneratedDoc(text=response_text, prompt=input.query)
+
+
+if __name__ == "__main__":
+    opea_microservices["opea_service@llm_predictionguard_docsum"].start()
@@ -0,0 +1,12 @@
+aiohttp
+docarray
+fastapi
+opentelemetry-api
+opentelemetry-exporter-otlp
+opentelemetry-sdk
+Pillow
+predictionguard
+prometheus-fastapi-instrumentator
+shortuuid
+transformers
+uvicorn
@@ -33,7 +33,7 @@ curl -X POST http://localhost:9000/v1/chat/completions \
         "temperature": 0.7,
         "top_p": 0.9,
         "top_k": 50,
-        "stream": false
+        "streaming": false
     }'
 ```
 
@@ -49,6 +49,6 @@ curl -N -X POST http://localhost:9000/v1/chat/completions \
         "temperature": 0.7,
         "top_p": 0.9,
         "top_k": 50,
-        "stream": true
+        "streaming": true
     }'
 ```
@@ -0,0 +1,68 @@
+#!/bin/bash
+# Copyright (C) 2024 Prediction Guard, Inc.
+# SPDX-License-Identifier: Apache-2.0
+
+set -x  # Print commands and their arguments as they are executed
+
+WORKPATH=$(dirname "$PWD")
+ip_address=$(hostname -I | awk '{print $1}')  # Adjust to a more reliable command
+if [ -z "$ip_address" ]; then
+    ip_address="localhost"  # Default to localhost if IP address is empty
+fi
+
+function build_docker_images() {
+    cd $WORKPATH
+    echo $(pwd)
+    docker build --no-cache -t opea/llm-pg:comps -f comps/llms/summarization/predictionguard/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/llm-pg built failed"
+        exit 1
+    else
+        echo "opea/llm-pg built successfully"
+    fi
+}
+
+function start_service() {
+    llm_service_port=9000
+    unset http_proxy
+    docker run -d --name=test-comps-llm-pg-server \
+        -e http_proxy= -e https_proxy= \
+        -e PREDICTIONGUARD_API_KEY=${PREDICTIONGUARD_API_KEY} \
+        -p 9000:9000 --ipc=host opea/llm-pg:comps
+    sleep 5 # Sleep for 5 seconds to allow the service to start
+}
+
+function validate_microservice() {
+    llm_service_port=9000
+    result=$(http_proxy="" curl http://${ip_address}:${llm_service_port}/v1/chat/docsum \
+        -X POST \
+        -d '{"model": "Hermes-3-Llama-3.1-8B", "query": "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data.", "streaming": false, "max_tokens": 100, "temperature": 0.7, "top_p": 1.0, "top_k": 50}' \
+        -H 'Content-Type: application/json')
+
+    if [[ $result == *"text"* ]]; then
+        echo "Service response is correct."
+    else
+        echo "Result wrong. Received was $result"
+        docker logs test-comps-llm-pg-server
+        exit 1
+    fi
+}
+
+function stop_docker() {
+    cid=$(docker ps -aq --filter "name=test-comps-llm-pg-*")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+}
+
+function main() {
+    stop_docker
+
+    build_docker_images
+    start_service
+
+    validate_microservice
+
+    stop_docker
+    echo y | docker system prune
+}
+
+main
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Copyright (C) 2024 Prediction Guard, Inc.
		# SPDX-License-Identifier: Apache-2.0