From 37fe3a75ff5b262362a322a48dd8a1ac3001083a Mon Sep 17 00:00:00 2001 From: Aakanksha Duggal Date: Thu, 16 Jan 2025 15:19:41 -0500 Subject: [PATCH] Remove the legacy document format to move to docling v2 output Signed-off-by: Aakanksha Duggal --- src/instructlab/sdg/utils/chunkers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/instructlab/sdg/utils/chunkers.py b/src/instructlab/sdg/utils/chunkers.py index 52c15ba1..6783e96a 100644 --- a/src/instructlab/sdg/utils/chunkers.py +++ b/src/instructlab/sdg/utils/chunkers.py @@ -522,11 +522,11 @@ def export_documents(self, converted_docs: Iterable[ConversionResult]): # Export Deep Search document JSON format: with (docling_artifacts_path / f"{doc_filename}.json").open("w") as fp: - fp.write(json.dumps(doc.legacy_document.export_to_dict())) + fp.write(json.dumps(doc.export_to_dict())) # Export Markdown format: with (docling_artifacts_path / f"{doc_filename}.md").open("w") as fp: - fp.write(doc.legacy_document.export_to_markdown()) + fp.write(doc.export_to_markdown()) else: logger.info(f"Document {doc.input.file} failed to convert.") failure_count += 1