Skip to content

Commit

Permalink
Add a functional test to exercise our split out APIs
Browse files Browse the repository at this point in the history
This adds a functional test to test the entire end-to-end flow of
`ilab data generate` exercised purely from the individual pieces of
the split out SDG APIs. No actual LLM inference happens, and instead
we just mock out all the responses for the sake of testing speed /
hardware since we don't need real LLM responses to verify our APIs.

Signed-off-by: Ben Browning <[email protected]>
  • Loading branch information
bbrowning committed Jan 7, 2025
1 parent 8336e42 commit f84902f
Show file tree
Hide file tree
Showing 6 changed files with 392 additions and 0 deletions.
Empty file added tests/functional/__init__.py
Empty file.
101 changes: 101 additions & 0 deletions tests/functional/test_granular_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# SPDX-License-Identifier: Apache-2.0

# Standard
from datetime import datetime
from unittest.mock import MagicMock
import glob
import pathlib

# Third Party
import git

# First Party
from instructlab.sdg import BlockRegistry
from instructlab.sdg.generate_data import (
generate_taxonomy,
mix_datasets,
postprocess_taxonomy,
preprocess_taxonomy,
)

# Local
from ..mockllmblock import MockLLMBlock


def _clone_instructlab_taxonomy(taxonomy_dir):
taxonomy_repo_url = "https://github.com/instructlab/taxonomy"
taxonomy_commit = "dfa3afaf26f40f923cf758389719619ec9b1ddb1"
repo = git.Repo.clone_from(taxonomy_repo_url, taxonomy_dir, no_checkout=True)
repo.git.checkout(taxonomy_commit)


def test_granular_api_end_to_end(testdata_path: pathlib.Path, tmp_path: pathlib.Path):
# Registry our mock block so we can reference it in pipelines
BlockRegistry.register("MockLLMBlock")(MockLLMBlock)

# Clone a taxonomy and edit 1 file in it
taxonomy_dir = tmp_path.joinpath("taxonomy")
_clone_instructlab_taxonomy(taxonomy_dir)
changed_qna_yaml = taxonomy_dir.joinpath(
"knowledge", "science", "animals", "birds", "black_capped_chickadee", "qna.yaml"
)
with open(changed_qna_yaml, "a", encoding="utf-8") as file:
file.write("")

pipeline_dir = testdata_path.joinpath("mock_pipelines")
date_suffix = datetime.now().replace(microsecond=0).isoformat().replace(":", "_")

preprocessed_dir = tmp_path.joinpath("preprocessed")
preprocess_taxonomy(
taxonomy_dir=taxonomy_dir,
output_dir=preprocessed_dir,
)
chickadee_docs = glob.glob(
str(
preprocessed_dir.joinpath(
"documents", "knowledge_science_*", "chickadee.md"
)
)
)
assert chickadee_docs
chickadee_samples_path = preprocessed_dir.joinpath(
"knowledge_science_animals_birds_black_capped_chickadee.jsonl"
)
assert chickadee_samples_path.is_file()

client = MagicMock()
client.server_supports_batched = False
generated_dir = tmp_path.joinpath("generated")
generate_taxonomy(
client=client,
input_dir=preprocessed_dir,
output_dir=generated_dir,
pipeline=pipeline_dir,
)
generated_chickadee_samples_path = generated_dir.joinpath(
"knowledge_science_animals_birds_black_capped_chickadee.jsonl"
)
assert generated_chickadee_samples_path.is_file()

postprocessed_dir = tmp_path.joinpath("postprocessed")
postprocess_taxonomy(
input_dir=generated_dir,
output_dir=postprocessed_dir,
date_suffix=date_suffix,
pipeline=pipeline_dir,
)
knowledge_recipe_file = postprocessed_dir.joinpath(
f"knowledge_recipe_{date_suffix}.yaml"
)
assert knowledge_recipe_file.is_file()
skills_recipe_file = postprocessed_dir.joinpath(f"skills_recipe_{date_suffix}.yaml")
assert skills_recipe_file.is_file()

mixed_skills_output_file = (
f"{postprocessed_dir}/skills_train_msgs_{date_suffix}.jsonl"
)
mix_datasets(
recipe_file=f"{postprocessed_dir}/skills_recipe_{date_suffix}.yaml",
output_file=mixed_skills_output_file,
)
assert pathlib.Path(mixed_skills_output_file).is_file()
55 changes: 55 additions & 0 deletions tests/mockllmblock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# SPDX-License-Identifier: Apache-2.0

# Standard
import random
import string

# Third Party
from datasets import Dataset

# First Party
from instructlab.sdg import LLMBlock


def _random_string(size):
return "".join(random.choices(string.ascii_lowercase, k=size))


def _add_mocked_cols(sample, block_name):
match block_name:
case "gen_questions" | "gen_grounded_questions":
sample["question"] = f"Is this a question {_random_string(8)}?"
case "eval_questions" | "eval_grounded_questions":
sample["evaluation"] = "This is an evaluation."
sample["score"] = "1"
case "gen_responses" | "gen_grounded_responses":
sample["response"] = "This is a response."
case "evaluate_qa_pair" | "evaluate_grounded_qa_pair":
sample["evaluation"] = "This is an evaluation."
sample["score"] = "2"
case "gen_contexts":
sample["context"] = f"This is a context {_random_string(8)}."
case "gen_spellcheck":
sample["spellcheck"] = sample["document"]
case "gen_knowledge":
sample["question"] = f"Is this a question {_random_string(8)}?"
sample["response"] = "This is a response."
case "eval_faithfulness_qa_pair":
sample["explanation"] = "This is an explanation."
sample["judgment"] = "YES"
case "eval_relevancy_qa_pair":
sample["feedback"] = "This is some feedback."
sample["score"] = "2"
case "eval_verify_question":
sample["explanation"] = "This is an explanation."
sample["rating"] = "1"
case _:
raise Exception(
f"Received an un-mocked LLMBlock: {block_name}. Add code in {__file__} to handle this block."
)
return sample


class MockLLMBlock(LLMBlock):
def generate(self, samples: Dataset):
return samples.map(_add_mocked_cols, fn_kwargs={"block_name": self.block_name})
53 changes: 53 additions & 0 deletions tests/testdata/mock_pipelines/freeform_skills.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
version: "1.0"
blocks:
- name: gen_questions
type: MockLLMBlock
config:
config_path: ../../../src/instructlab/sdg/configs/skills/freeform_questions.yaml
output_cols:
- question
batch_kwargs:
num_samples: 30
drop_duplicates:
- question
- name: eval_questions
type: MockLLMBlock
config:
config_path: ../../../src/instructlab/sdg/configs/skills/evaluate_freeform_questions.yaml
output_cols:
- evaluation
- score
- name: filter_questions
type: FilterByValueBlock
config:
filter_column: score
filter_value: 1.0
operation: eq
convert_dtype: float
drop_columns:
- evaluation
- score
- num_samples
- name: gen_responses
type: MockLLMBlock
config:
config_path: ../../../src/instructlab/sdg/configs/skills/freeform_responses.yaml
output_cols:
- response
- name: evaluate_qa_pair
type: MockLLMBlock
config:
config_path: ../../../src/instructlab/sdg/configs/skills/evaluate_freeform_pair.yaml
output_cols:
- evaluation
- score
- name: filter_qa_pair
type: FilterByValueBlock
config:
filter_column: score
filter_value: 2.0
operation: ge
convert_dtype: float
drop_columns:
- evaluation
- score
70 changes: 70 additions & 0 deletions tests/testdata/mock_pipelines/grounded_skills.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
version: "1.0"
blocks:
- name: gen_contexts
type: MockLLMBlock
config:
config_path: ../../../src/instructlab/sdg/configs/skills/contexts.yaml
output_cols:
- context
gen_kwargs:
temperature: 0.7
max_tokens: 2048
n: 10
seed: 42
drop_duplicates:
- context
- name: gen_grounded_questions
type: MockLLMBlock
config:
config_path: ../../../src/instructlab/sdg/configs/skills/grounded_questions.yaml
output_cols:
- question
batch_kwargs:
num_samples: 3
drop_duplicates:
- question
- name: eval_grounded_questions
type: MockLLMBlock
config:
config_path: ../../../src/instructlab/sdg/configs/skills/evaluate_grounded_questions.yaml
output_cols:
- evaluation
- score
- name: filter_grounded_questions
type: FilterByValueBlock
config:
filter_column: score
filter_value: 1.0
operation: eq
convert_dtype: float
drop_columns:
- evaluation
- score
- num_samples
- name: gen_grounded_responses
type: MockLLMBlock
config:
config_path: ../../../src/instructlab/sdg/configs/skills/grounded_responses.yaml
output_cols:
- response
- name: evaluate_grounded_qa_pair
type: MockLLMBlock
config:
config_path: ../../../src/instructlab/sdg/configs/skills/evaluate_grounded_pair.yaml
output_cols:
- evaluation
- score
- name: filter_grounded_qa_pair
type: FilterByValueBlock
config:
filter_column: score
filter_value: 2.0
operation: ge
convert_dtype: float
- name: combine_question_and_context
type: CombineColumnsBlock
config:
columns:
- context
- question
output_col: question
113 changes: 113 additions & 0 deletions tests/testdata/mock_pipelines/knowledge.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
version: "1.0"
blocks:
- name: duplicate_document_col
type: DuplicateColumnsBlock
config:
columns_map:
document: base_document

- name: gen_spellcheck
type: MockLLMBlock
config:
config_path: ../../../src/instructlab/sdg/configs/knowledge/spellcheck.yaml
output_cols:
- spellcheck
gen_kwargs:
max_tokens: 2048

- name: flatten_auxiliary_columns
type: FlattenColumnsBlock
config:
var_cols:
- spellcheck
- base_document
value_name: corrected_document
var_name: dataset_type

- name: rename_to_document_column
type: RenameColumnsBlock
config:
columns_map:
document: raw_document
corrected_document: document

- name: gen_knowledge
type: MockLLMBlock
config:
config_path: ../../../src/instructlab/sdg/configs/knowledge/generate_questions_responses.yaml
output_cols:
- question
- response
parser_kwargs:
parser_name: custom
parsing_pattern: '\[(?:Question|QUESTION)\]\s*(.*?)\s*\[(?:Answer|ANSWER)\]\s*(.*?)\s*(?=\[(?:Question|QUESTION)\]|$)'
parser_cleanup_tags:
- "[END]"
- "[End]"
gen_kwargs:
max_tokens: 2048
drop_duplicates:
- question
- name: eval_faithfulness_qa_pair
type: MockLLMBlock
config:
config_path: ../../../src/instructlab/sdg/configs/knowledge/evaluate_faithfulness.yaml
output_cols:
- explanation
- judgment
gen_kwargs:
max_tokens: 2048
- name: filter_faithfulness
type: FilterByValueBlock
config:
filter_column: judgment
filter_value: "YES"
operation: eq
drop_columns:
- judgment
- explanation
- name: eval_relevancy_qa_pair
type: MockLLMBlock
config:
config_path: ../../../src/instructlab/sdg/configs/knowledge/evaluate_relevancy.yaml
output_cols:
- feedback
- score
gen_kwargs:
max_tokens: 2048
- name: filter_relevancy
type: FilterByValueBlock
config:
filter_column: score
filter_value: 2.0
operation: eq
convert_dtype: float
drop_columns:
- feedback
- score
- name: eval_verify_question
type: MockLLMBlock
config:
config_path: ../../../src/instructlab/sdg/configs/knowledge/evaluate_question.yaml
output_cols:
- explanation
- rating
gen_kwargs:
max_tokens: 2048
- name: filter_verify_question
type: FilterByValueBlock
config:
filter_column: rating
filter_value: 1.0
operation: eq
convert_dtype: float
drop_columns:
- explanation
- rating
- __index_level_0__

datamixing:
auxiliary_instructions:
spellcheck:
- Correct any spelling errors in the document and output the corrected version.
- Rewrite the document to remove any spelling errors.

0 comments on commit f84902f

Please sign in to comment.