From d39298c1289a7dcc4d95b08bcd7ad90e9fbf12e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Kuligowski?= <mkuligowski@habana.ai>
Date: Tue, 17 Sep 2024 15:46:51 +0200
Subject: [PATCH] Update documentation on support of fp8  (#288)

Update documentation on support of fp8
---
 README_GAUDI.md                                    | 3 ++-
 docs/source/getting_started/gaudi-installation.rst | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/README_GAUDI.md b/README_GAUDI.md
index 5109f7ddf9927..2ae9d5f2cc6e4 100644
--- a/README_GAUDI.md
+++ b/README_GAUDI.md
@@ -81,6 +81,7 @@ Supported Features
 -   Inference with [HPU
     Graphs](https://docs.habana.ai/en/latest/PyTorch/Inference_on_PyTorch/Inference_Using_HPU_Graphs.html)
     for accelerating low-batch latency and throughput
+-   INC quantization
 
 Unsupported Features
 ====================
@@ -88,7 +89,7 @@ Unsupported Features
 -   Beam search
 -   LoRA adapters
 -   Attention with Linear Biases (ALiBi)
--   Quantization (AWQ, FP8 E5M2, FP8 E4M3)
+-   AWQ quantization
 -   Prefill chunking (mixed-batch inferencing)
 
 Supported Configurations
diff --git a/docs/source/getting_started/gaudi-installation.rst b/docs/source/getting_started/gaudi-installation.rst
index 27bc0186675f1..2d810380af59b 100644
--- a/docs/source/getting_started/gaudi-installation.rst
+++ b/docs/source/getting_started/gaudi-installation.rst
@@ -76,6 +76,7 @@ Supported Features
 -  Tensor parallelism support for multi-card inference
 -  Inference with `HPU Graphs <https://docs.habana.ai/en/latest/PyTorch/Inference_on_PyTorch/Inference_Using_HPU_Graphs.html>`__
    for accelerating low-batch latency and throughput
+-  INC quantization
 
 Unsupported Features
 ====================
@@ -83,7 +84,7 @@ Unsupported Features
 -  Beam search
 -  LoRA adapters
 -  Attention with Linear Biases (ALiBi)
--  Quantization (AWQ, FP8 E5M2, FP8 E4M3)
+-  AWQ quantization
 -  Prefill chunking (mixed-batch inferencing)
 
 Supported Configurations