Skip to content

Commit

Permalink
Change prints in build_chunks_from_docling_json to debug messages
Browse files Browse the repository at this point in the history
Signed-off-by: Khaled Sulayman <[email protected]>
  • Loading branch information
khaledsulayman committed Nov 7, 2024
1 parent 00d4d1f commit d31f130
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/instructlab/sdg/utils/chunkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ def build_chunks_from_docling_json(
and len(current_buffer) > 1
):
chunk_text = "\n\n".join(current_buffer[:-1])
print(
logger.debug(
f"Current chunk size {self.get_token_count(chunk_text, tokenizer)} and max is {max_token_per_chunk}"
)

Expand All @@ -513,7 +513,7 @@ def build_chunks_from_docling_json(
self.get_token_count(current_buffer[-1], tokenizer)
>= max_token_per_chunk
):
print(
logger.debug(
f"This is too big a document to be left in the current buffer {self.get_token_count(current_buffer[-1], tokenizer)}"
)
document_chunks.append(current_buffer[-1])
Expand Down

0 comments on commit d31f130

Please sign in to comment.