Skip to content

Commit

Permalink
Merge branch 'main' into enable_help
Browse files Browse the repository at this point in the history
  • Loading branch information
Deegue committed Jan 9, 2024
2 parents a81fbb7 + f26343d commit 9ef5db0
Show file tree
Hide file tree
Showing 14 changed files with 36 additions and 32 deletions.
4 changes: 2 additions & 2 deletions docs/web_ui.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Please follow [setup.md](setup.md) to setup the environment first.
## Start Web UI

```bash
python -u inference/start_ui.py --node_user_name $user --conda_env_name $conda_env --master_ip_port "$node_ip:6379"
python -u ui/start_ui.py --node_user_name $user --conda_env_name $conda_env --master_ip_port "$node_ip:6379"
# Get urls from the log
# Running on local URL: http://0.0.0.0:8080
# Running on public URL: https://180cd5f7c31a1cfd3c.gradio.live
Expand All @@ -28,7 +28,7 @@ On the `Deployment` tab, you can choose a model to deploy, configure parameter `


## Chatbot
On the `Inferenc` tab, you can now test the model by asking questions.
On the `Inference` tab, you can now test the model by asking questions.

![webui3](https://github.com/intel/llm-on-ray/assets/9278199/f7b9dc79-92fe-4e75-85fa-2cf7f36bb58d)

Expand Down
12 changes: 6 additions & 6 deletions finetune/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,9 @@
from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig

import sys
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))

sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
import common
from finetune_config import FinetuneConfig
from finetune.finetune_config import FinetuneConfig


def get_accelerate_environment_variable(mode: str, config: Dict[str, Any]) -> dict:
Expand Down Expand Up @@ -167,9 +166,10 @@ def get_finetune_config():


def main(external_config = None):
config = get_finetune_config()
if external_config is not None:
config.merge(external_config)
if not external_config:
config = get_finetune_config()
else:
config = external_config

config["cwd"] = os.getcwd()
num_training_workers = config["Training"].get("num_training_workers")
Expand Down
4 changes: 2 additions & 2 deletions inference/api_server_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@

import os
from ray import serve
from api_openai_backend.query_client import RouterQueryClient
from api_openai_backend.router_app import Router, router_app
from inference.api_openai_backend.query_client import RouterQueryClient
from inference.api_openai_backend.router_app import Router, router_app


def router_application(deployments):
Expand Down
3 changes: 1 addition & 2 deletions inference/deepspeed_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
from predictor import Predictor
from utils import get_torch_dtype


from inference_config import InferenceConfig, DEVICE_CPU, DEVICE_XPU, IPEX_PRECISION_BF16
from inference.inference_config import InferenceConfig, DEVICE_CPU, DEVICE_XPU, IPEX_PRECISION_BF16

class DSPipeline:
def __init__(
Expand Down
2 changes: 1 addition & 1 deletion inference/predictor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import re
import torch
from transformers import AutoTokenizer, StoppingCriteriaList
from inference_config import InferenceConfig
from inference.inference_config import InferenceConfig
from utils import max_input_len, StoppingCriteriaSub

class Predictor:
Expand Down
12 changes: 9 additions & 3 deletions inference/predictor_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,19 @@
# limitations under the License.
#

import os
import sys
import asyncio
import functools
from ray import serve
from starlette.requests import Request
from queue import Empty
import torch
from transformers import TextIteratorStreamer
from inference_config import InferenceConfig
from inference.inference_config import InferenceConfig
from typing import Union
from starlette.responses import StreamingResponse
from api_openai_backend.openai_protocol import ModelResponse
from inference.api_openai_backend.openai_protocol import ModelResponse


@serve.deployment
Expand All @@ -35,7 +37,11 @@ def __init__(self, infer_conf: InferenceConfig):
chat_processor_name = infer_conf.model_description.chat_processor
prompt = infer_conf.model_description.prompt
if chat_processor_name:
module = __import__("chat_process")
try:
module = __import__("chat_process")
except:
sys.path.append(os.path.dirname(__file__))
module = __import__("chat_process")
chat_processor = getattr(module, chat_processor_name, None)
if chat_processor is None:
raise ValueError(infer_conf.name + " deployment failed. chat_processor(" + chat_processor_name + ") does not exist.")
Expand Down
2 changes: 1 addition & 1 deletion inference/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
#

import ray
from inference_config import ModelDescription, InferenceConfig, all_models
import sys
from utils import get_deployment_actor_options
from pydantic_yaml import parse_yaml_raw_as
from api_server_simple import serve_run
from api_server_openai import openai_serve_run
from predictor_deployment import PredictorDeployment
from inference.inference_config import ModelDescription, InferenceConfig, all_models

def get_deployed_models(args):
# serve all pre-defined models, or model from MODEL_TO_SERVE env, if no model argument specified
Expand Down
2 changes: 1 addition & 1 deletion inference/transformer_predictor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
from transformers import TextIteratorStreamer
from inference_config import InferenceConfig, IPEX_PRECISION_BF16
from inference.inference_config import InferenceConfig, IPEX_PRECISION_BF16
from predictor import Predictor
from utils import get_torch_dtype

Expand Down
3 changes: 1 addition & 2 deletions inference/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@

from transformers import StoppingCriteria
import torch

from inference_config import InferenceConfig, DEVICE_CPU
from inference.inference_config import InferenceConfig, DEVICE_CPU

def get_deployment_actor_options(infer_conf: InferenceConfig):
_ray_env_key = "env_vars"
Expand Down
File renamed without changes.
File renamed without changes
File renamed without changes
File renamed without changes
24 changes: 12 additions & 12 deletions inference/start_ui.py → ui/start_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@
#

import requests
from inference_config import all_models, ModelDescription, Prompt
from inference_config import InferenceConfig as FinetunedConfig
import time
import os
from chat_process import ChatModelGptJ, ChatModelLLama
import torch
from predictor_deployment import PredictorDeployment
import sys
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from inference.inference_config import all_models, ModelDescription, Prompt
from inference.inference_config import InferenceConfig as FinetunedConfig
from inference.chat_process import ChatModelGptJ, ChatModelLLama
from inference.predictor_deployment import PredictorDeployment
from ray import serve
import ray
import gradio as gr
import sys
import argparse
from ray.tune import Stopper
from ray.train.base_trainer import TrainingFailedError
Expand All @@ -34,7 +34,7 @@
from ray.util import queue
import paramiko
from html_format import cpu_memory_html, ray_status_html, custom_css
from typing import Any, Dict
from typing import Dict
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from pyrecdp.LLM import TextPipeline
Expand Down Expand Up @@ -172,7 +172,7 @@ def model_generate(self, prompt, request_url, config):

sample_input = {"text": prompt, "config": config, "stream": True}
proxies = { "http": None, "https": None}
outputs = requests.post(request_url, proxies=proxies, json=[sample_input], stream=True)
outputs = requests.post(request_url, proxies=proxies, json=sample_input, stream=True)
outputs.raise_for_status()
for output in outputs.iter_content(chunk_size=None, decode_unicode=True):
# remove context
Expand Down Expand Up @@ -479,7 +479,7 @@ def deploy_func(self, model_name: str, replica_num: int, cpus_per_worker_deploy:

finetuned_deploy = finetuned.copy(deep=True)
finetuned_deploy.device = 'cpu'
finetuned_deploy.precision = 'bf16'
finetuned_deploy.ipex.precision = 'bf16'
finetuned_deploy.model_description.prompt.stop_words = stop_words
finetuned_deploy.cpus_per_worker = cpus_per_worker_deploy
# transformers 4.35 is needed for neural-chat-7b-v3-1, will be fixed later
Expand Down Expand Up @@ -581,7 +581,7 @@ def _init_ui(self):
with gr.Blocks(css=custom_css,title=title) as gr_chat:
head_content = """
<div style="color: #fff;text-align: center;">
<div style="position:absolute; left:15px; top:15px; "><img src="/file=inference/ui_images/logo.png" width="50" height="50"/></div>
<div style="position:absolute; left:15px; top:15px; "><img src="/file=ui/images/logo.png" width="50" height="50"/></div>
<p style="color: #fff; font-size: 1.1rem;">Manage LLM Lifecycle</p>
<p style="color: #fff; font-size: 0.9rem;">Fine-Tune LLMs using workflow on Ray, Deploy and Inference</p>
</div>
Expand Down Expand Up @@ -755,7 +755,7 @@ def _init_ui(self):
with gr.Row():
with gr.Column(scale=0.1, min_width=45):
with gr.Row():
node_pic = r"./inference/ui_images/Picture2.png"
node_pic = r"./ui/images/Picture2.png"
gr.Image(type="pil", value=node_pic, show_label=False, min_width=45, height=45, width=45, elem_id="notshowimg", container=False)
with gr.Row():
gr.HTML("<h4 style='text-align: center; margin-bottom: 1rem'> Ray Cluster </h4>")
Expand Down Expand Up @@ -794,7 +794,7 @@ def _init_ui(self):

with gr.Column(scale=0.065, min_width=45):
with gr.Row():
node_pic = r"./inference/ui_images/Picture1.png"
node_pic = r"./ui/images/Picture1.png"
gr.Image(type="pil", value=node_pic, show_label=False, min_width=45, height=45, width=45, elem_id="notshowimg", container=False)
with gr.Row():
if node_ip == self.head_node_ip:
Expand Down

0 comments on commit 9ef5db0

Please sign in to comment.