unable to apply transformation: 'headlines' property not found in this node #1775

Z-oo883 · 2024-12-20T03:11:20Z

Your Question
What should I do when I encountered these errors when generating test data in a non English language?

Code Examples

from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_openai import ChatOpenAI
from langchain.embeddings import HuggingFaceEmbeddings
from ragas.testset.persona import Persona
from ragas.testset.transforms.extractors.llm_based import NERExtractor
from ragas.testset.transforms.splitters import HeadlineSplitter
from ragas.testset import TestsetGenerator
from ragas.testset.synthesizers.single_hop.specific import (
    SingleHopSpecificQuerySynthesizer,
)
import os
import asyncio

os.environ["OPENAI_API_KEY"] = "sk-xxxxxxxx"


# 你的其他导入和环境设置保持不变

async def generate_data():
    path = "D:/ragas"
    loader = DirectoryLoader(path, glob="**/*.md")
    docs = loader.load()
    print(len(docs))

    generator_llm = LangchainLLMWrapper(ChatOpenAI(model="deepseek-chat", base_url="xxxx", api_key="sk-xxxxxxx"))
    embedding_model_name = r"embedding\bge-large-zh-v1.5"
    embedding_model_kwargs = {'device': 'cpu'}
    embedding_encode_kwargs = {'batch_size': 32, 'normalize_embeddings': True}

    embed_model = HuggingFaceEmbeddings(
        model_name=embedding_model_name,
        model_kwargs=embedding_model_kwargs,
        encode_kwargs=embedding_encode_kwargs
    )
    generator_embeddings = LangchainEmbeddingsWrapper(embed_model)

    personas = [
        Persona(
            name="好奇的学生",
            role_description="对世界充满好奇并希望更多地了解不同文化和语言的学生",
        ),
    ]

    transforms = [HeadlineSplitter(), NERExtractor()]

    generator = TestsetGenerator(
        llm=generator_llm, embedding_model=generator_embeddings, persona_list=personas
    )

    distribution = [
        (SingleHopSpecificQuerySynthesizer(llm=generator_llm), 1.0),
    ]

    
    for query, _ in distribution:
        prompts = await query.adapt_prompts("chinese", llm=generator_llm)
        query.set_prompts(**prompts)

    dataset = generator.generate_with_langchain_docs(
        docs[:],
        testset_size=5,
        transforms=transforms,
        query_distribution=distribution,
    )
    print(dataset)
    eval_dataset = dataset.to_evaluation_dataset()
    print(eval_dataset)
    df = eval_dataset.to_pandas()
    df.to_csv("ragas_generate_data.csv", encoding='utf-8-sig')


# 使用 asyncio.run 来运行异步函数
if __name__ == "__main__":
    asyncio.run(generate_data())

error

D:\CRAG\ragas_generate_data.py:136: LangChainDeprecationWarning: The class `HuggingFaceEmbeddings` was deprecated in LangChain 0.2.2 and will be removed in 1.0. An updated version of the class exists in the :class:`~langchain-huggingface package and should be used instead. To use it run `pip install -U :class:`~langchain-huggingface` and import as `from :class:`~langchain_huggingface import HuggingFaceEmbeddings``.
  embed_model = HuggingFaceEmbeddings(
Applying HeadlineSplitter:   0%|          | 0/1 [00:00<?, ?it/s]unable to apply transformation: 'headlines' property not found in this node
Applying NERExtractor:   0%|          | 0/1 [00:00<?, ?it/s]unable to apply transformation: Connection error.
Generating Scenarios: 100%|██████████| 1/1 [00:04<00:00,  4.83s/it]
Generating Samples: 0it [00:00, ?it/s]
Traceback (most recent call last):
  File "D:\CRAG\ragas_generate_data.py", line 180, in <module>
    asyncio.run(generate_data())
  File "D:\anaconda\envs\crag\Lib\site-packages\nest_asyncio.py", line 30, in run
    return loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\anaconda\envs\crag\Lib\site-packages\nest_asyncio.py", line 98, in run_until_complete
    return f.result()
           ^^^^^^^^^^
  File "D:\anaconda\envs\crag\Lib\asyncio\futures.py", line 203, in result
    raise self._exception.with_traceback(self._exception_tb)
  File "D:\anaconda\envs\crag\Lib\asyncio\tasks.py", line 267, in __step
    result = coro.send(None)
             ^^^^^^^^^^^^^^^
  File "D:\CRAG\ragas_generate_data.py", line 171, in generate_data
    print(dataset)
  File "D:\anaconda\envs\crag\Lib\site-packages\ragas\dataset_schema.py", line 277, in __str__
    return f"EvaluationDataset(features={self.features()}, len={len(self.samples)})"
                                         ^^^^^^^^^^^^^^^
  File "D:\anaconda\envs\crag\Lib\site-packages\ragas\dataset_schema.py", line 226, in features
    return self.samples[0].get_features()
           ~~~~~~~~~~~~^^^
IndexError: list index out of range

Process finished with exit code -1073741819 (0xC0000005)

The text was updated successfully, but these errors were encountered:

Z-oo883 added the question Further information is requested label Dec 20, 2024

dosubot bot added bug Something isn't working module-testsetgen Module testset generation labels Dec 20, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

unable to apply transformation: 'headlines' property not found in this node #1775

unable to apply transformation: 'headlines' property not found in this node #1775

Z-oo883 commented Dec 20, 2024 •

edited

Loading

unable to apply transformation: 'headlines' property not found in this node #1775

unable to apply transformation: 'headlines' property not found in this node #1775

Comments

Z-oo883 commented Dec 20, 2024 • edited Loading

Z-oo883 commented Dec 20, 2024 •

edited

Loading