-
Notifications
You must be signed in to change notification settings - Fork 209
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #268 from yiliu30/itrex_woq
Integrate ITREX to support int8 model on the CPU-only devices
- Loading branch information
Showing
7 changed files
with
124 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# from xturing.datasets.instruction_dataset import InstructionDataset | ||
from xturing.models import BaseModel | ||
|
||
# Initializes the model: Quantize model with weight only algorithms and | ||
# replace the linear with itrex's qbits_linear kernel | ||
model = BaseModel.create("gpt2_int8") | ||
|
||
# Once the model has been quantized, you can do inferences directly | ||
output = model.generate(texts=["Why LLM models are becoming so important?"]) | ||
print(output) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# from xturing.datasets.instruction_dataset import InstructionDataset | ||
from xturing.models import BaseModel | ||
|
||
# Initializes the model: Quantize model with weight only algorithms and | ||
# replace the linear with itrex's qbits_linear kernel | ||
model = BaseModel.create("llama2_int8") | ||
|
||
# Once the model has been quantized, you can do inferences directly | ||
output = model.generate(texts=["Why LLM models are becoming so important?"]) | ||
print(output) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,23 @@ | ||
import torch | ||
|
||
from xturing.utils.interactive import is_interactive_execution | ||
from xturing.utils.logging import configure_logger | ||
from xturing.utils.utils import assert_install_itrex | ||
|
||
logger = configure_logger(__name__) | ||
|
||
# check if cuda is available, if not use cpu and throw warning | ||
DEFAULT_DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") | ||
DEFAULT_DTYPE = torch.float16 if DEFAULT_DEVICE.type == "cuda" else torch.float32 | ||
IS_INTERACTIVE = is_interactive_execution() | ||
|
||
if DEFAULT_DEVICE.type == "cpu": | ||
print("WARNING: CUDA is not available, using CPU instead, can be very slow") | ||
logger.warning("WARNING: CUDA is not available, using CPU instead, can be very slow") | ||
|
||
|
||
def assert_not_cpu_int8(): | ||
assert DEFAULT_DEVICE.type != "cpu", "Int8 models are not supported on CPU" | ||
|
||
def assert_cpu_int8_on_itrex(): | ||
if DEFAULT_DEVICE.type == "cpu": | ||
assert_install_itrex() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters