Changing model used in main example notebook

noamgat · Nov 27, 2023 · 3b5c2af · 3b5c2af
1 parent fccfee7
commit 3b5c2af
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 30 deletions.
diff --git a/README.md b/README.md
@@ -25,9 +25,8 @@ This project solves the issues by filtering the tokens that the language model i
 ## Basic Tutorial
 ```python
 # Requirements if running from Google Colab with a T4 GPU. 
-!pip install lm-format-enforcer transformers torch huggingface_hub accelerate bitsandbytes cpm_kernels
-# Need to login with an account with access to llama.
-!huggingface-cli login
+!pip install transformers torch lm-format-enforcer huggingface_hub optimum langchain langchain-experimental
+!pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/ 
 
 from pydantic import BaseModel
 from lmformatenforcer import JsonSchemaParser
@@ -41,7 +40,7 @@ class AnswerFormat(BaseModel):
     num_seasons_in_nba: int
 
 # Create a transformers pipeline
-hf_pipeline = pipeline('text-generation', model='meta-llama/Llama-2-7b-hf', model_kwargs={'load_in_8bit': True})
+hf_pipeline = pipeline('text-generation', model='TheBloke/Llama-2-7b-Chat-GPTQ', device_map='auto')
 prompt = f'Here is information about Michael Jordan in the following json schema: {AnswerFormat.schema_json()} :\n'
 
 # Create a character level parser and build a transformers prefix function from it

diff --git a/samples/colab_llama2_enforcer.ipynb b/samples/colab_llama2_enforcer.ipynb
@@ -12,27 +12,14 @@
     "Menu Bar -> Runtime -> Change runtime type -> T4 GPU (at the time of writing this notebook). [Guide here](https://www.codesansar.com/deep-learning/using-free-gpu-tpu-google-colab.htm)."
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Gathering huggingface credentials (user action required)\n",
-    "\n",
-    "We begin by installing the dependencies. This demo uses llama2, so you will have to create a free huggingface account, request access to the llama2 model, create an access token, and insert it when executing the next cell will request it.\n",
-    "\n",
-    "Links:\n",
-    "\n",
-    "- [Request access to llama model](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf). See the \"Access Llama 2 on Hugging Face\" section.\n",
-    "- [Create huggingface access token](https://huggingface.co/settings/tokens)\n"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
-    "!pip install transformers torch lm-format-enforcer huggingface_hub accelerate bitsandbytes cpm_kernels langchain langchain-experimental \n",
+    "!pip install transformers torch lm-format-enforcer huggingface_hub optimum langchain langchain-experimental\n",
+    "!pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/ \n",
     "\n",
     "# When running from source / developing the library, use this instead\n",
     "# %load_ext autoreload\n",
@@ -43,16 +30,6 @@
     "# os.environ['CUDA_LAUNCH_BLOCKING'] = '1'"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from huggingface_hub import notebook_login\n",
-    "notebook_login()"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -79,7 +56,7 @@
     "import torch\n",
     "from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer\n",
     "\n",
-    "model_id = 'meta-llama/Llama-2-7b-chat-hf'\n",
+    "model_id = 'TheBloke/Llama-2-7b-Chat-GPTQ'\n",
     "device = 'cuda'\n",
     "\n",
     "if torch.cuda.is_available():\n",