From 245c8494e88e5197b10b96fef3361e6771c2cf0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C4=B0lker=20SI=C4=9EIRCI?= Date: Tue, 14 Jan 2025 15:44:58 +0300 Subject: [PATCH] fix for uploaded audio url for custom element --- .vscode/launch.json | 17 ++++++ .../public/elements/AudioWithTranscript.jsx | 56 ++++++++++++++++--- src/podflix/gui/audio.py | 24 ++++---- .../utils/chainlit_utils/data_layer.py | 29 ++++++++++ src/podflix/utils/chainlit_utils/general.py | 5 ++ 5 files changed, 113 insertions(+), 18 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 2cf3923..64b408d 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -18,6 +18,23 @@ "${workspaceFolder}/src/podflix/gui/mock.py" ], }, + { + "name": "Chainlit Audio", + "type": "debugpy", + "request": "launch", + // "program": "${file}", + "console": "integratedTerminal", + "module": "chainlit", + "justMyCode": false, + "args": [ + "run", + "--headless", + "--port", + "5000", + // "${file}", + "${workspaceFolder}/src/podflix/gui/audio.py" + ], + }, { "name": "Debug Current File", "type": "debugpy", diff --git a/configs/chainlit/public/elements/AudioWithTranscript.jsx b/configs/chainlit/public/elements/AudioWithTranscript.jsx index c0358eb..c3dcca5 100644 --- a/configs/chainlit/public/elements/AudioWithTranscript.jsx +++ b/configs/chainlit/public/elements/AudioWithTranscript.jsx @@ -1,5 +1,8 @@ import { Card, CardHeader, CardContent } from "@/components/ui/card" import { ScrollArea } from "@/components/ui/scroll-area" +import { Button } from "@/components/ui/button" +import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip" +import { Download } from "lucide-react" import { useRef } from "react" export default function AudioWithTranscript() { @@ -18,17 +21,54 @@ export default function AudioWithTranscript() { return `${mins}:${secs.toString().padStart(2, '0')}`; }; + const handleDownload = () => { + const content = props.segments + .map(segment => `[${formatTimestamp(segment.start)}] ${segment.text}`) + .join('\n'); + + const blob = new Blob([content], { type: 'text/plain' }); + const url = URL.createObjectURL(blob); + const a = document.createElement('a'); + a.href = url; + // Remove any existing extension and add .txt + const baseFileName = props.name.replace(/\.[^/.]+$/, ''); + a.download = baseFileName + '.txt'; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(url); + }; + return ( - +
+ + + + + + + +

Download the transcript

+
+
+
+
diff --git a/src/podflix/gui/audio.py b/src/podflix/gui/audio.py index 5633bf4..a10479d 100644 --- a/src/podflix/gui/audio.py +++ b/src/podflix/gui/audio.py @@ -6,13 +6,16 @@ from langchain_community.chat_message_histories import ChatMessageHistory from langfuse.callback import CallbackHandler as LangfuseCallbackHandler from literalai.helper import utc_now -from loguru import logger from podflix.env_settings import env_settings from podflix.graph.podcast_rag import compiled_graph -from podflix.utils.chainlit_utils.data_layer import apply_sqlite_data_layer_fixes +from podflix.utils.chainlit_utils.data_layer import ( + apply_sqlite_data_layer_fixes, + get_read_url_of_file, +) from podflix.utils.chainlit_utils.general import ( create_message_history_from_db_thread, + get_current_chainlit_thread_id, set_extra_user_session_params, simple_auth_callback, ) @@ -63,7 +66,7 @@ async def on_chat_start(): accept=["audio/*"], max_files=1, max_size_mb=50, - timeout=180, + timeout=360, ).send() file = files[0] @@ -75,25 +78,26 @@ async def on_chat_start(): cl.user_session.set("audio_text", audio_text) - logger.debug(f"Audio file path: {file.path}") - - # TODO: Fetch the audio url from the db using get_element_url - audio_url = file.path + # NOTE: Workaround to get s3 url of the uploaded file in the current thread + thread_id = get_current_chainlit_thread_id() + audio_url = await get_read_url_of_file(thread_id=thread_id, file_name=file.name) # Create audio element with transcript and segments audio_element = cl.CustomElement( name="AudioWithTranscript", props={ - "audioUrl": audio_url, + "name": file.name, + "url": audio_url, "segments": segments, }, display="side", ) - system_message.content = "Audio transcribed successfully 🎉" system_message.elements.append(audio_element) - system_message.content += "\nAudioWithTranscript" + system_message.content = "Audio transcribed successfully 🎉" + system_message.content += "\nOfficialAudio AudioWithTranscript" + await system_message.update() diff --git a/src/podflix/utils/chainlit_utils/data_layer.py b/src/podflix/utils/chainlit_utils/data_layer.py index b016911..ca76e4d 100644 --- a/src/podflix/utils/chainlit_utils/data_layer.py +++ b/src/podflix/utils/chainlit_utils/data_layer.py @@ -9,6 +9,7 @@ import boto3 import chainlit as cl import chainlit.socket +from chainlit.data import get_data_layer from chainlit.data.sql_alchemy import SQLAlchemyDataLayer from chainlit.data.storage_clients.s3 import S3StorageClient from chainlit.element import ElementDict @@ -154,6 +155,34 @@ async def get_element_url( return element_dict.url +async def get_read_url_of_file(thread_id: str, file_name: str) -> str: + """Retrieve the URL for accessing an file in a thread. + + Examples: + >>> data_layer = ChainlitDataLayer() + >>> url = await get_read_url_of_file(data_layer, "thread123", "audio.mp3") + >>> print(url) # URL string + + Args: + thread_id: The unique identifier of the thread containing the file. + file_name: The full name of the the file to retrieve, included the file extension. + + Returns: + str: The S3 URL string of the file. + + Raises: + ValueError: If S3 storage client is not configured in the data layer. + """ + cl_data_layer = get_data_layer() + + if cl_data_layer.storage_client is None: + raise ValueError("S3 storage client not set in data layer.") + + object_key = f"threads/{thread_id}/files/{file_name}" + + return await cl_data_layer.storage_client.get_read_url(object_key=object_key) + + def apply_sqlite_data_layer_fixes(): """Apply necessary fixes for SQLite data layer configuration. diff --git a/src/podflix/utils/chainlit_utils/general.py b/src/podflix/utils/chainlit_utils/general.py index d3cbe37..f218981 100644 --- a/src/podflix/utils/chainlit_utils/general.py +++ b/src/podflix/utils/chainlit_utils/general.py @@ -124,3 +124,8 @@ def set_extra_user_session_params( langfuse_session_url = get_lf_session_url(session_id=session_id) logger.debug(f"Langfuse Session URL: {langfuse_session_url}") + + +def get_current_chainlit_thread_id() -> str: + """Get the current Chainlit thread ID.""" + return cl.context.session.thread_id