Skip to content

Commit

Permalink
updated submodule
Browse files Browse the repository at this point in the history
  • Loading branch information
rskasturi committed Feb 8, 2024
2 parents f5a68e1 + b20d221 commit 489cbe0
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 3 deletions.
3 changes: 2 additions & 1 deletion load_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
LlamaForCausalLM,
LlamaTokenizer,
)
import intel_extension_for_pytorch as ipex
# Uncomment below line if you have Intel® Discrete GPU's and it has XPU Support.
#import intel_extension_for_pytorch as ipex
from constants import CONTEXT_WINDOW_SIZE, MAX_NEW_TOKENS, N_GPU_LAYERS, N_BATCH, MODELS_PATH


Expand Down
31 changes: 29 additions & 2 deletions run_localGPT.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler # for streaming response
from langchain.callbacks.manager import CallbackManager
from langchain.llms.base import LLM
import intel_extension_for_pytorch as ipex

# Uncomment below line if you have Intel Discrete GPU's and it has XPU Support
#import intel_extension_for_pytorch as ipex

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

Expand All @@ -21,6 +23,18 @@
pipeline,
)

import warnings

warnings.filterwarnings(
"ignore", category=UserWarning, module="intel_extension_for_pytorch"
)
warnings.filterwarnings(
"ignore", category=UserWarning, module="torchvision.io.image", lineno=13
)
warnings.filterwarnings(
"ignore", category=UserWarning, module="transformers"
)

from load_models import (
load_quantized_model_gguf_ggml,
load_quantized_model_qptq,
Expand Down Expand Up @@ -66,7 +80,20 @@ def load_model(device_type, model_id, model_basename=None, LOGGING=logging):
else:
model, tokenizer = load_quantized_model_qptq(model_id, model_basename, device_type, LOGGING)
else:
model, tokenizer = load_full_model(model_id, model_basename, device_type, LOGGING)
model, tokenizer = load_full_model(model_id, model_basename, device_type, LOGGING)
if device_type == "xpu":
class CustomLLM(LLM):
def _call(self, prompt, stop=None, run_manager=None) -> str:
input_ids = tokenizer.encode(prompt, return_tensors="pt").to('xpu')
result = model.generate(input_ids=input_ids, max_new_tokens=MAX_NEW_TOKENS)
result = tokenizer.decode(result[0])
return result
@property
def _llm_type(self) -> str:
return "custom"

llm = CustomLLM()
return llm

class CustomLLM(LLM):
def _call(self, prompt, stop=None, run_manager=None) -> str:
Expand Down

0 comments on commit 489cbe0

Please sign in to comment.