diff --git a/tests/test_chunking_methods.py b/tests/test_chunking_methods.py index ff21fc5..02c3e17 100644 --- a/tests/test_chunking_methods.py +++ b/tests/test_chunking_methods.py @@ -98,9 +98,13 @@ def test_chunk_by_tokens(): assert end - start <= 10 -def test_chunk_semantically(): +@pytest.mark.parametrize( + 'model_name', + ['jinaai/jina-embeddings-v2-small-en', 'sentence-transformers/all-MiniLM-L6-v2'], +) +def test_chunk_semantically(model_name): chunker = Chunker(chunking_strategy="semantic") - tokenizer = AutoTokenizer.from_pretrained('jinaai/jina-embeddings-v2-small-en') + tokenizer = AutoTokenizer.from_pretrained(model_name) tokens = tokenizer.encode_plus( EXAMPLE_TEXT_1, add_special_tokens=False, return_offsets_mapping=True ) @@ -108,7 +112,7 @@ def test_chunk_semantically(): EXAMPLE_TEXT_1, tokenizer=tokenizer, chunking_strategy='semantic', - embedding_model_name='jinaai/jina-embeddings-v2-small-en', + embedding_model_name=model_name, ) # check if it returns boundary cues