diff --git a/docs/reference/index.md b/docs/reference/index.md
index a5357fd86..0cd42ba88 100644
--- a/docs/reference/index.md
+++ b/docs/reference/index.md
@@ -10,6 +10,6 @@ By default, language models stop generating tokens after and <EOS> token was gen
 ```python
 import outlines.models as models
 
-complete = models.openai("gpt-3.5-turbo")
+complete = models.openai("gpt-4o-mini")
 expert = complete("Name an expert in quantum gravity.", stop_at=["\n", "."])
 ```
diff --git a/docs/reference/models/models.md b/docs/reference/models/models.md
index 34b5be4cf..4d2dda8c9 100644
--- a/docs/reference/models/models.md
+++ b/docs/reference/models/models.md
@@ -42,7 +42,11 @@ model = outlines.models.openai(
 | Stream            | ✅           | ❌                  | ❌   | ✅        | ?         | ✅    | ❌      |
 | **`outlines.generate`** |        |                     |      |           |           |       |         |
 | Text              | ✅           | ✅                  | ✅   | ✅        | ✅        | ✅    | ✅      |
-| Structured*       | ✅           | ✅                  | ✅   | ✅        | ✅        | ✅    | ❌      |
+| __Structured__    | ✅           | ✅                  | ✅   | ✅        | ✅        | ✅    | ✅      |
+| JSON Schema       | ✅           | ✅                  | ✅   | ✅        | ✅        | ✅    | ✅      |
+| Choice            | ✅           | ✅                  | ✅   | ✅        | ✅        | ✅    | ✅      |
+| Regex             | ✅           | ✅                  | ✅   | ✅        | ✅        | ✅    | ❌      |
+| Grammar           | ✅           | ✅                  | ✅   | ✅        | ✅        | ✅    | ❌      |
 
 
 ## Caveats
diff --git a/docs/reference/models/openai.md b/docs/reference/models/openai.md
index 7f610c179..5c737c916 100644
--- a/docs/reference/models/openai.md
+++ b/docs/reference/models/openai.md
@@ -2,22 +2,21 @@
 
 !!! Installation
 
-    You need to install the `openai` and `tiktoken` libraries to be able to use the OpenAI API in Outlines.
+    You need to install the `openai` library to be able to use the OpenAI API in Outlines.
 
 ## OpenAI models
 
-Outlines supports models available via the OpenAI Chat API, e.g. ChatGPT and GPT-4. You can initialize the model by passing the model name to `outlines.models.openai`:
+Outlines supports models available via the OpenAI Chat API, e.g. GPT-4o, ChatGPT and GPT-4. You can initialize the model by passing the model name to `outlines.models.openai`:
 
 ```python
 from outlines import models
 
 
-model = models.openai("gpt-3.5-turbo")
-model = models.openai("gpt-4-turbo")
+model = models.openai("gpt-4o-mini")
 model = models.openai("gpt-4o")
 ```
 
-Check the [OpenAI documentation](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4) for an up-to-date list of available models. You can pass any parameter you would pass to `openai.AsyncOpenAI` as keyword arguments:
+Check the [OpenAI documentation](https://platform.openai.com/docs/models/gpt-4o) for an up-to-date list of available models. You can pass any parameter you would pass to `openai.AsyncOpenAI` as keyword arguments:
 
 ```python
 import os
@@ -25,7 +24,7 @@ from outlines import models
 
 
 model = models.openai(
-    "gpt-3.5-turbo",
+    "gpt-4o-mini",
     api_key=os.environ["OPENAI_API_KEY"]
 )
 ```
@@ -56,8 +55,8 @@ from outlines import models
 
 model = models.azure_openai(
     "azure-deployment-name",
-    "gpt-3.5-turbo",
-    api_version="2023-07-01-preview",
+    "gpt-4o-mini",
+    api_version="2024-07-18",
     azure_endpoint="https://example-endpoint.openai.azure.com",
 )
 ```
@@ -111,6 +110,37 @@ model = models.openai(client, config)
 
     You need to pass the async client to be able to do batch inference.
 
+## Structured Generation Support
+
+Outlines provides support for [OpenAI Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs/json-mode) via `outlines.generate.json`, `outlines.generate.choice`
+
+```python
+from pydantic import BaseModel, ConfigDict
+import outlines.models as models
+from outlines import generate
+
+model = models.openai("gpt-4o-mini")
+
+class Person(BaseModel):
+    model_config = ConfigDict(extra='forbid')  # required for openai
+    first_name: str
+    last_name: str
+    age: int
+
+generate.json(model, Person)
+generator("current indian prime minister on january 1st 2023")
+# Person(first_name='Narendra', last_name='Modi', age=72)
+
+generator = generate.choice(model, ["Chicken", "Egg"])
+print(generator("Which came first?"))
+# Chicken
+```
+
+!!! Warning
+
+    Structured generation support only provided to OpenAI-compatible endpoints which conform to OpenAI's standard. Additionally, `generate.regex` and `generate.cfg` are not supported.
+
+
 ## Advanced configuration
 
 For more advanced configuration option, such as support proxy, please consult the [OpenAI SDK's documentation](https://github.com/openai/openai-python):
@@ -146,7 +176,7 @@ config = OpenAIConfig(
     top_p=.95,
     seed=0,
 )
-model = models.openai("gpt-3.5-turbo", config)
+model = models.openai("gpt-4o-mini", config)
 ```
 
 ## Monitoring API use
@@ -158,7 +188,7 @@ from openai import AsyncOpenAI
 import outlines.models
 
 
-model = models.openai("gpt-4")
+model = models.openai("gpt-4o")
 
 print(model.prompt_tokens)
 # 0
diff --git a/docs/reference/text.md b/docs/reference/text.md
index 3d0d130f1..f36a67da0 100644
--- a/docs/reference/text.md
+++ b/docs/reference/text.md
@@ -9,7 +9,7 @@ Outlines provides a unified interface to generate text with many language models
 ```python
 from outlines import models, generate
 
-model = models.openai("gpt-4")
+model = models.openai("gpt-4o-mini")
 generator = generate.text(model)
 answer = generator("What is 2+2?")
 
diff --git a/examples/babyagi.py b/examples/babyagi.py
index 6af2ead88..0a7a0b13b 100644
--- a/examples/babyagi.py
+++ b/examples/babyagi.py
@@ -10,7 +10,7 @@
 import outlines
 import outlines.models as models
 
-model = models.openai("gpt-3.5-turbo")
+model = models.openai("gpt-4o-mini")
 complete = outlines.generate.text(model)
 
 
diff --git a/examples/math_generate_code.py b/examples/math_generate_code.py
index 5bf16e86a..7eb1651a7 100644
--- a/examples/math_generate_code.py
+++ b/examples/math_generate_code.py
@@ -35,7 +35,7 @@ def execute_code(code):
 
 
 prompt = answer_with_code_prompt(question, examples)
-model = models.openai("gpt-3.5-turbo")
+model = models.openai("gpt-4o-mini")
 answer = outlines.generate.text(model)(prompt)
 result = execute_code(answer)
 print(f"It takes Carla {result:.0f} minutes to download the file.")
diff --git a/examples/meta_prompting.py b/examples/meta_prompting.py
index 8f9ba5cee..cba18b5fe 100644
--- a/examples/meta_prompting.py
+++ b/examples/meta_prompting.py
@@ -140,7 +140,7 @@ def run_example(model_fn, question, model_name):
     parser.add_argument(
         "--model",
         type=str,
-        default="gpt-3.5-turbo-1106",
+        default="gpt-4o-mini",
         help="The Large Language Model to use to run the examples.",
     )
     args = parser.parse_args()
diff --git a/examples/pick_odd_one_out.py b/examples/pick_odd_one_out.py
index 6c4d45927..6cd9f1daf 100644
--- a/examples/pick_odd_one_out.py
+++ b/examples/pick_odd_one_out.py
@@ -31,7 +31,7 @@ def build_ooo_prompt(options):
 
 options = ["sea", "mountains", "plains", "sock"]
 
-model = models.openai("gpt-3.5-turbo")
+model = models.openai("gpt-4o-mini")
 gen_text = outlines.generate.text(model)
 gen_choice = outlines.generate.choice(model, options)
 
diff --git a/examples/react.py b/examples/react.py
index ccd74dbbc..34b3c6eb2 100644
--- a/examples/react.py
+++ b/examples/react.py
@@ -13,6 +13,7 @@
 import requests  # type: ignore
 
 import outlines
+import outlines.generate as generate
 import outlines.models as models
 
 
@@ -45,25 +46,25 @@ def search_wikipedia(query: str):
 
 
 prompt = build_reAct_prompt("Where is Apple Computers headquarted? ")
-model = models.openai("gpt-3.5-turbo")
-complete = outlines.generate.text(model)
+model = models.openai("gpt-4o-mini")
+
+mode_generator = generate.choice(model, choices=["Tho", "Act"])
+action_generator = generate.choice(model, choices=["Search", "Finish"])
+text_generator = generate.text(model)
 
 for i in range(1, 10):
-    mode = complete.generate_choice(prompt, choices=["Tho", "Act"], max_tokens=128)
+    mode = mode_generator(prompt, max_tokens=128)
     prompt = add_mode(i, mode, "", prompt)
 
     if mode == "Tho":
-        thought = complete(prompt, stop_at="\n", max_tokens=128)
+        thought = text_generator(prompt, stop_at="\n", max_tokens=128)
         prompt += f"{thought}"
     elif mode == "Act":
-        action = complete.generate_choice(
-            prompt, choices=["Search", "Finish"], max_tokens=128
-        )
+        action = action_generator(prompt, max_tokens=128)
         prompt += f"{action} '"
 
-        subject = complete(
-            prompt, stop_at=["'"], max_tokens=128
-        )  # Apple Computers headquartered
+        subject = text_generator(prompt, stop_at=["'"], max_tokens=128)
+        # Apple Computers headquartered
         subject = " ".join(subject.split()[:2])
         prompt += f"{subject}'"
 
diff --git a/examples/self_consistency.py b/examples/self_consistency.py
index e7468d5f9..f1bbe2a18 100644
--- a/examples/self_consistency.py
+++ b/examples/self_consistency.py
@@ -55,7 +55,7 @@ def few_shots(question, examples):
     """
 
 
-model = models.openai("gpt-3.5-turbo")
+model = models.openai("gpt-4o-mini")
 generator = outlines.generate.text(model)
 prompt = few_shots(question, examples)
 answers = generator(prompt, samples=10)