Skip to content

Commit

Permalink
Separate indexing from generation. /text -> /generate and flatten
Browse files Browse the repository at this point in the history
  • Loading branch information
rlouf committed Nov 16, 2023
1 parent ba2645c commit 8d9152a
Show file tree
Hide file tree
Showing 32 changed files with 49 additions and 44 deletions.
2 changes: 1 addition & 1 deletion docs/api/continuation.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
::: outlines.text.generate.continuation
::: outlines.generate.continuation
2 changes: 1 addition & 1 deletion docs/api/fsm.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
::: outlines.text.fsm
::: outlines.index.fsm
2 changes: 1 addition & 1 deletion docs/api/json_schema.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
::: outlines.text.json_schema
::: outlines.index.json_schema
2 changes: 1 addition & 1 deletion docs/api/parsing.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
::: outlines.text.parsing
::: outlines.index.parsing
2 changes: 1 addition & 1 deletion docs/api/regex.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
::: outlines.text.generate.regex
::: outlines.generate.regex
2 changes: 1 addition & 1 deletion docs/api/samplers.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
::: outlines.text.generate.samplers
::: outlines.generate.samplers
5 changes: 2 additions & 3 deletions examples/dating_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
from pydantic import BaseModel, conlist

import outlines
import outlines.models as models
import outlines.text as text
from outlines import models


class QuestionChoice(str, Enum):
Expand Down Expand Up @@ -122,7 +121,7 @@ def dating_profile_prompt(description: str, examples: list[Example]):
new_description = "I'm a laid-back lawyer who spends a lot of his free-time gaming. I work in a corporate office, but ended up here after the start-up I cofounded got acquired, so still play ping pong with my cool coworkers every day. I have a bar at home where I make cocktails, which is great for entertaining friends. I secretly like to wear suits and get a new one tailored every few months. I also like weddings because I get to wear those suits, and it's a good excuse for a date. I watch the latest series because I'm paying, with my hard-earned money, for every streaming service."

prompt = dating_profile_prompt(description=new_description, examples=samples)
profile = text.generate.json(model, DatingProfile)(prompt) # type: ignore
profile = outlines.generate.json(model, DatingProfile)(prompt) # type: ignore
print(profile)

# Sample generated profiles
Expand Down
2 changes: 1 addition & 1 deletion examples/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
set_seed,
)

from outlines.text.parsing import PartialLark, PartialPythonIndenter
from outlines.index.parsing import PartialLark, PartialPythonIndenter

revision = None
checkpoint = "Salesforce/codegen-350M-mono"
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

import torch

from outlines.text.generate.sequence import Sequence
from outlines.generate.sequence import Sequence

if TYPE_CHECKING:
from outlines.text.generate.samplers import Sampler
from outlines.generate.samplers import Sampler


class Continuation(Sequence):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import torch

if TYPE_CHECKING:
from outlines.text.generate.samplers import Sampler
from outlines.generate.samplers import Sampler


def process(generator: Generator, index, token_ids: torch.Tensor):
Expand Down
13 changes: 8 additions & 5 deletions outlines/text/generate/regex.py → outlines/generate/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,16 @@
import torch
from pydantic import BaseModel

from outlines.text.fsm import create_fsm_index_tokenizer, make_deterministic_fsm
from outlines.text.generate.continuation import Continuation
from outlines.text.json_schema import build_regex_from_object, get_schema_from_signature
from outlines.text.types import python_types_to_regex
from outlines.generate.continuation import Continuation
from outlines.index.fsm import create_fsm_index_tokenizer, make_deterministic_fsm
from outlines.index.json_schema import (
build_regex_from_object,
get_schema_from_signature,
)
from outlines.index.types import python_types_to_regex

if TYPE_CHECKING:
from outlines.text.generate.samplers import Sampler
from outlines.generate.samplers import Sampler


class Regex(Continuation):
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from outlines.models import OpenAIAPI

if TYPE_CHECKING:
from outlines.generate.samplers import Sampler
from outlines.models.transformers import KVCacheType, Transformers
from outlines.text.generate.samplers import Sampler


class Sequence:
Expand Down Expand Up @@ -45,7 +45,7 @@ def __init__(
model.tokenizer.pad_token_id, device=model.device
)
if sampler is None:
from outlines.text.generate.samplers import multinomial
from outlines.generate.samplers import multinomial

self.sampler = multinomial
else:
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion outlines/text/parsing.py → outlines/index/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from lark.parsers.lalr_interactive_parser import InteractiveParser
from lark.parsers.lalr_parser import LALR_Parser, ParseConf, ParserState, _Parser

from outlines.text.fsm import (
from outlines.index.fsm import (
fsm_union,
get_sub_fsms_from_seq,
make_deterministic_fsm,
Expand Down
File renamed without changes.
1 change: 0 additions & 1 deletion outlines/text/__init__.py

This file was deleted.

Empty file added tests/generate/__init__.py
Empty file.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import torch

from outlines.text.generate.continuation import Continuation, continuation
from outlines.generate.continuation import Continuation, continuation


class Tokenizer:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytest
import torch

from outlines.text.generator import bias_logits, token_generator
from outlines.generate.generator import bias_logits, token_generator


def test_generator_error():
Expand Down Expand Up @@ -133,3 +133,11 @@ def is_final(state):
return True
else:
return False


def test_update_token_ids():
raise NotImplementedError


def expand_attention_masks():
raise NotImplementedError
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
import torch
from pydantic import BaseModel, constr

import outlines.generate as generate
import outlines.models as models
import outlines.text.generate as generate
from outlines.index.fsm import reduced_vocabulary
from outlines.models.transformers import TransformersTokenizer
from outlines.text.fsm import reduced_vocabulary


def test_transformers_integration_continuation():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import pytest
import torch

import outlines.text.generate as generate
from outlines.text.fsm import create_fsm_index_tokenizer, make_deterministic_fsm
from outlines.text.generate.regex import Regex
import outlines.generate as generate
from outlines.generate.regex import Regex
from outlines.index.fsm import create_fsm_index_tokenizer, make_deterministic_fsm


class Tokenizer:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,7 @@

import torch

from outlines.text.generate.samplers import (
greedy,
multinomial,
vectorized_random_choice,
)
from outlines.generate.samplers import greedy, multinomial, vectorized_random_choice


def test_greedy():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import pytest
import torch

from outlines.generate.sequence import Sequence
from outlines.models import OpenAIAPI
from outlines.models.tokenizer import Tokenizer
from outlines.text.generate.sequence import Sequence


def test_openai_error():
Expand Down
File renamed without changes.
6 changes: 3 additions & 3 deletions tests/text/test_fsm.py → tests/index/test_fsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
import numba
import pytest

from outlines.models.transformers import TransformersTokenizer
from outlines.text.fsm import (
from outlines.index.fsm import (
_walk_fsm,
create_fsm_index,
create_fsm_index_end_to_end,
Expand All @@ -14,6 +13,7 @@
make_deterministic_fsm,
walk_fsm,
)
from outlines.models.transformers import TransformersTokenizer


def walk_fsm_numba(
Expand Down Expand Up @@ -429,7 +429,7 @@ def test_json_index_performance():
from pydantic import BaseModel, constr

import outlines.models as models
from outlines.text.generate.regex import Regex, build_regex_from_object
from outlines.generate.regex import Regex, build_regex_from_object

class Weapon(str, Enum):
sword = "sword"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest
from pydantic import BaseModel, constr

from outlines.text.json_schema import (
from outlines.index.json_schema import (
BOOLEAN,
INTEGER,
NULL,
Expand Down
8 changes: 4 additions & 4 deletions tests/text/test_parsing.py → tests/index/test_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
from lark.indenter import DedentError
from lark.lexer import UnexpectedCharacters, UnexpectedToken

from outlines.text.parsing import PartialLark, PartialPythonIndenter
from outlines.index.parsing import PartialLark, PartialPythonIndenter


def test_partial_parsing():
lp = PartialLark.open_from_package(
"tests",
"partial_python.lark",
["text"],
["index"],
parser="lalr",
postlex=PartialPythonIndenter(),
start="file_input",
Expand Down Expand Up @@ -123,7 +123,7 @@ def test_partial_parsing():
lp = PartialLark.open_from_package(
"tests",
"partial_python.lark",
["text"],
["index"],
parser="lalr",
postlex=PartialPythonIndenter(),
start="file_input",
Expand Down Expand Up @@ -160,7 +160,7 @@ def test_sequential_parse_example():
lp = PartialLark.open_from_package(
"tests",
"partial_python.lark",
["text"],
["index"],
parser="lalr",
postlex=PartialPythonIndenter(),
start="file_input",
Expand Down
2 changes: 1 addition & 1 deletion tests/text/test_types.py → tests/index/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest

from outlines.text.types import (
from outlines.index.types import (
BOOLEAN,
DATE,
DATETIME,
Expand Down

0 comments on commit 8d9152a

Please sign in to comment.