From 1984d553f630871eea5c9cde753f10b24650cc59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= Date: Mon, 15 Jan 2024 15:47:30 +0100 Subject: [PATCH] fix: Make bot speak slower --- main.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/main.py b/main.py index 9749ae31..cd988ee9 100644 --- a/main.py +++ b/main.py @@ -5,7 +5,7 @@ FileSource, PhoneNumberIdentifier, RecognizeInputType, - TextSource, + SsmlSource, ) from azure.communication.sms import SmsClient from azure.core.credentials import AzureKeyCredential @@ -910,17 +910,20 @@ async def handle_hangup(client: CallConnectionClient, call: CallModel) -> None: _logger.warn(f"Failed SMS to {call.phone_number} ({call.id})", exc_info=True) -def audio_from_text(text: str) -> TextSource: +def audio_from_text(text: str) -> SsmlSource: + """ + Generate an audio source that can be read by Azure Communication Services SDK. + + Text requires to be SVG escaped, and SSML tags are used to control the voice. Plus, text is slowed down by 5% to make it more understandable for elderly people. Text is also truncated to 400 characters, as this is the limit of Azure Communication Services TTS, but a warning is logged. + """ + # Azure Speech Service TTS limit is 400 characters if len(text) > 400: _logger.warning( f"Text is too long to be processed by TTS, truncating to 400 characters, fix this!" ) text = text[:400] - return TextSource( - source_locale=CONFIG.workflow.conversation_lang, - text=text, - voice_name=CONFIG.communication_service.voice_name, - ) + ssml = f'{text}' + return SsmlSource(ssml_text=ssml) def callback_url(caller_id: str) -> str: