Skip to content

Commit

Permalink
add RenderWhatsApp markdown renderer, extract markdown image urls ins…
Browse files Browse the repository at this point in the history
…ide the renderer itself and send over whatsapp
  • Loading branch information
milovate authored and devxpy committed Feb 12, 2025
1 parent bfaabcc commit 19214e4
Show file tree
Hide file tree
Showing 5 changed files with 159 additions and 8 deletions.
27 changes: 24 additions & 3 deletions daras_ai/text_format.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import ast

import markdown_it
import markdown_it.presets
import parse
from markdown_it import MarkdownIt

from daras_ai_v2.tts_markdown_renderer import RendererPlain

from daras_ai_v2.wa_markdown_renderer import RendererWhatsApp

input_spec_parse_pattern = "{" * 5 + "}" * 5

Expand Down Expand Up @@ -47,4 +48,24 @@ def format_number_with_suffix(num: int) -> str:

def unmarkdown(text: str) -> str:
"""markdown to plaintext"""
return MarkdownIt(renderer_cls=RendererPlain).render(text)
return markdown_it.MarkdownIt(renderer_cls=RendererPlain).render(text)


def markdown_to_wa(text: str) -> tuple[str, list[str]]:
"""markdown to whatsapp"""
md = markdown_it.MarkdownIt(renderer_cls=RendererWhatsApp)

def _render_line(line: str) -> str:
if not line:
return "\n"
content = line.lstrip()
whitespace = line[: -len(content)]
ret = md.render(content)
if not ret:
return whitespace
return whitespace + ret + "\n"

return (
"".join(map(_render_line, text.split("\n"))),
md.renderer.collected_img_urls,
)
41 changes: 40 additions & 1 deletion daras_ai_v2/facebook_bots.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from bots.models import BotIntegration, Platform, Conversation
from daras_ai.image_input import upload_file_from_bytes, get_mimetype_from_response
from daras_ai.text_format import markdown_to_wa
from daras_ai_v2 import settings
from daras_ai_v2.asr import (
audio_bytes_to_wav,
Expand All @@ -11,6 +12,8 @@
from daras_ai_v2.exceptions import raise_for_status
from daras_ai_v2.text_splitter import text_splitter

WA_IMG_MAX_SIZE = 5 * 1024**2

WA_MSG_MAX_SIZE = 1024


Expand Down Expand Up @@ -136,7 +139,7 @@ def send_msg_to(
user_number: str,
access_token: str | None = None,
) -> str | None:
# see https://developers.facebook.com/docs/whatsapp/api/messages/media/
text, images = markdown_to_wa(text)

# split text into chunks if too long
if text and len(text) > WA_MSG_MAX_SIZE:
Expand All @@ -163,6 +166,7 @@ def send_msg_to(
access_token=access_token,
)

# see https://developers.facebook.com/docs/whatsapp/api/messages/media/
messages = []
if video:
if buttons:
Expand Down Expand Up @@ -190,6 +194,7 @@ def send_msg_to(
},
},
]

elif buttons:
# interactive text msg
messages = _build_msg_buttons(
Expand Down Expand Up @@ -235,6 +240,14 @@ def send_msg_to(
for link in documents
] + messages

messages += [
{
"type": "image",
"image": {"link": wa_img_convert(img_url)},
}
for img_url in images
]

return send_wa_msgs_raw(
bot_number=bot_number,
user_number=user_number,
Expand Down Expand Up @@ -491,3 +504,29 @@ def send_fb_msgs_raw(
},
)
print("send_fb_msgs_raw:", r.status_code, r.json())


def wa_img_convert(f_url: str) -> str:
from wand.image import Image
from daras_ai_v2.vector_search import download_content_bytes
from daras_ai_v2.vector_search import doc_url_to_file_metadata

# check for mime type and size from metadata because whatsapp allows max 5MB png/jpeg
# https://developers.facebook.com/docs/whatsapp/cloud-api/reference/media/#image
metadata = doc_url_to_file_metadata(f_url)
if (
metadata.mime_type in {"image/png", "image/jpeg"}
or metadata.total_bytes < WA_IMG_MAX_SIZE
):
return f_url
else:
f_bytes, mime_type = download_content_bytes(
f_url=f_url,
mime_type=metadata.mime_type,
export_links=metadata.export_links,
)
with Image(blob=f_bytes) as img:
if len(f_bytes) > WA_IMG_MAX_SIZE:
img.options["jpeg:extent"] = "5MB"
f_bytes = img.make_blob(format="jpeg")
return upload_file_from_bytes(metadata.name + ".jpeg", f_bytes, "image/jpeg")
2 changes: 1 addition & 1 deletion daras_ai_v2/vector_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ def do_check_document_updates(
metadatas = yield from apply_parallel(
doc_or_yt_url_to_file_metas,
lookups.keys(),
message="Fetching latest knowlege docs...",
message="Fetching latest knowledge docs...",
max_workers=100,
)

Expand Down
88 changes: 88 additions & 0 deletions daras_ai_v2/wa_markdown_renderer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import markdown_it.renderer


class RendererWhatsApp(markdown_it.renderer.RendererProtocol):
__output__ = "WhatsApp"

def __init__(self, parser=None):
self.collected_img_urls = []

self.heading_open = self.strong_open
self.heading_close = self.strong_close

self.fence = self.code_block

def render(self, tokens, options, env):
result = ""
for i, token in enumerate(tokens):
if token.type == "inline":
if token.children:
result += self.render(token.children, options, env)
else:
render_fn = getattr(self, token.type, None)
if render_fn:
result += render_fn(token)
else:
result += token.content
return result

# whatsapp only allows 1 star for bolding
_strong_level = 0

def strong_open(self, token):
self._strong_level += 1
if self._strong_level == 1:
return "*"
else:
return ""

def strong_close(self, token):
self._strong_level -= 1
if self._strong_level == 0:
return "*"
else:
return ""

def em_open(self, token):
return "_"

def em_close(self, token):
return "_"

def s_open(self, token):
return "~"

def s_close(self, token):
return "~"

def blockquote_open(self, token):
return "> "

def code_inline(self, token):
return f"`{token.content}`"

def code_block(self, token):
return "```"

def list_item_open(self, token):
if token.info:
return f"{token.info}. "
else:
return "- "

current_link = None

def link_open(self, token):
self.current_link = token.attrs.get("href")
return ""

def link_close(self, token):
if self.current_link:
return f" ({self.current_link})"
return ""

def image(self, token):
url = token.attrs.get("src")
if url and url.startswith("http"): # ignore local images
self.collected_img_urls.append(url)
return ""
9 changes: 6 additions & 3 deletions recipes/VideoBots.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,9 +751,12 @@ def render_steps(self):
gui.json(references)

if gui.session_state.get("functions"):
prompt_funcs = list(
get_tools_from_state(gui.session_state, FunctionTrigger.prompt)
)
try:
prompt_funcs = list(
get_tools_from_state(gui.session_state, FunctionTrigger.prompt)
)
except:
prompt_funcs = None
if prompt_funcs:
gui.write(f"🧩 `{FunctionTrigger.prompt.name} functions`")
for tool in prompt_funcs:
Expand Down

0 comments on commit 19214e4

Please sign in to comment.