Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
clemlesne committed Dec 5, 2024
2 parents 5000b5e + 9819c96 commit 8c9e70d
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 86 deletions.
16 changes: 15 additions & 1 deletion app/helpers/call_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ async def on_call_connected(
) -> None:
logger.info("Call connected, asking for language")
call.recognition_retry = 0 # Reset recognition retry counter
call.in_progress = True
call.messages.append(
MessageModel(
action=MessageActionEnum.CALL,
Expand Down Expand Up @@ -171,6 +172,7 @@ async def on_recognize_error(
call: CallStateModel,
client: CallAutomationClient,
contexts: set[CallContextEnum] | None,
post_callback: Callable[[CallStateModel], Awaitable[None]],
) -> None:
# Retry IVR recognition
if contexts and CallContextEnum.IVR_LANG_SELECT in contexts:
Expand All @@ -190,6 +192,7 @@ async def on_recognize_error(
await _handle_goodbye(
call=call,
client=client,
post_callback=post_callback,
)
return

Expand All @@ -199,6 +202,7 @@ async def on_recognize_error(
await _handle_goodbye(
call=call,
client=client,
post_callback=post_callback,
)
return

Expand All @@ -217,15 +221,24 @@ async def on_recognize_error(
async def _handle_goodbye(
call: CallStateModel,
client: CallAutomationClient,
post_callback: Callable[[CallStateModel], Awaitable[None]],
) -> None:
await handle_play_text(
res = await handle_play_text(
call=call,
client=client,
context=CallContextEnum.GOODBYE,
store=False, # Do not store timeout prompt as it perturbs the LLM and makes it hallucinate
text=await CONFIG.prompts.tts.goodbye(call),
)

if not res:
logger.info("Failed to play goodbye prompt, ending call now")
await _handle_hangup(
call=call,
client=client,
post_callback=post_callback,
)


@tracer.start_as_current_span("on_play_completed")
async def on_play_completed(
Expand Down Expand Up @@ -393,6 +406,7 @@ async def _handle_hangup(
post_callback: Callable[[CallStateModel], Awaitable[None]],
) -> None:
await handle_hangup(client=client, call=call)
call.in_progress = False
call.messages.append(
MessageModel(
action=MessageActionEnum.HANGUP,
Expand Down
104 changes: 46 additions & 58 deletions app/helpers/call_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ async def _timeout_callback() -> None:
call=call,
client=automation_client,
contexts=None,
post_callback=post_callback,
)
)

Expand Down Expand Up @@ -171,7 +172,10 @@ async def _response_callback() -> None:
)
)

# Add recognition to the call history
# Clear the recognition buffer
recognizer_buffer.clear()

# Store recognitio task
nonlocal last_response
last_response = await scheduler.spawn(
_out_answer(
Expand All @@ -183,11 +187,12 @@ async def _response_callback() -> None:
)
)

# Clear the recognition buffer
recognizer_buffer.clear()
# Wait for the response to be processed
await last_response.wait()

await _in_audio(
bits_per_sample=audio_bits_per_sample,
call=call,
channels=audio_channels,
clear_audio_callback=_clear_audio_callback,
in_stream=audio_stream,
Expand Down Expand Up @@ -249,6 +254,7 @@ async def _tts_callback(text: str, style: MessageStyleEnum) -> None:
call=call,
client=client,
post_callback=post_callback,
scheduler=scheduler,
tts_callback=_tts_callback,
use_tools=_iterations_remaining > 0,
)
Expand Down Expand Up @@ -314,20 +320,24 @@ def _clear_tasks() -> None:
)
soft_timeout_triggered = True
# Never store the error message in the call history, it has caused hallucinations in the LLM
await handle_play_text(
call=call,
client=client,
store=False,
text=await CONFIG.prompts.tts.timeout_loading(call),
await scheduler.spawn(
handle_play_text(
call=call,
client=client,
store=False,
text=await CONFIG.prompts.tts.timeout_loading(call),
)
)

elif loading_task.done(): # Do not play timeout prompt plus loading, it can be frustrating for the user
loading_task = _loading_task()
await handle_media(
call=call,
client=client,
sound_url=CONFIG.prompts.sounds.loading(),
) # Play loading sound
await scheduler.spawn(
handle_media(
call=call,
client=client,
sound_url=CONFIG.prompts.sounds.loading(),
)
)

# Wait to not block the event loop for other requests
await asyncio.sleep(1)
Expand Down Expand Up @@ -372,10 +382,11 @@ def _clear_tasks() -> None:

# TODO: Refacto, this function is too long
@tracer.start_as_current_span("call_execute_llm_chat")
async def _execute_llm_chat( # noqa: PLR0911, PLR0912, PLR0915
async def _execute_llm_chat( # noqa: PLR0913, PLR0911, PLR0912, PLR0915
call: CallStateModel,
client: CallAutomationClient,
post_callback: Callable[[CallStateModel], Awaitable[None]],
scheduler: aiojobs.Scheduler,
tts_callback: Callable[[str, MessageStyleEnum], Awaitable[None]],
use_tools: bool,
) -> tuple[bool, bool, CallStateModel]:
Expand Down Expand Up @@ -428,6 +439,7 @@ async def _content_callback(
tts_callback = _tts_callback(
automation_client=client,
call=call,
scheduler=scheduler,
)

# Build plugins
Expand Down Expand Up @@ -552,6 +564,7 @@ async def _content_callback(
# TODO: Refacto and simplify
async def _in_audio( # noqa: PLR0913
bits_per_sample: int,
call: CallStateModel,
channels: int,
clear_audio_callback: Callable[[], Awaitable[None]],
in_stream: asyncio.Queue[bytes],
Expand All @@ -561,59 +574,33 @@ async def _in_audio( # noqa: PLR0913
timeout_callback: Callable[[], Awaitable[None]],
) -> None:
clear_tts_task: asyncio.Task | None = None
no_voice_task: asyncio.Task | None = None
flush_task: asyncio.Task | None = None
vad = Vad(
# Aggressiveness mode (0, 1, 2, or 3)
# Sets the VAD operating mode. A more aggressive (higher mode) VAD is more restrictive in reporting speech. Put in other words the probability of being speech when the VAD returns 1 is increased with increasing mode. As a consequence also the missed detection rate goes up.
mode=3,
)

async def _timeout_callback() -> None:
"""
Alert the user that the call is about to be cut off.
"""
timeout_sec = await phone_silence_timeout_sec()

while True:
logger.debug(
"Wait foor %i sec before cutting off the call",
timeout_sec,
)

# Wait for the timeout
await asyncio.sleep(timeout_sec)

logger.info("Phone silence timeout triggered")

# Execute the callback
await timeout_callback()

async def _flush_callback() -> None:
"""
Flush the audio buffer if no audio is detected for a while.
"""
# Wait for the timeout
nonlocal clear_tts_task

timeout_ms = await vad_silence_timeout_ms()

# Wait for the timeout
await asyncio.sleep(timeout_ms / 1000)

# Cancel the TTS clear task if any
if clear_tts_task:
clear_tts_task.cancel()
clear_tts_task = None

logger.debug("Flushing audio buffer after %i ms", timeout_ms)

# Commit the buffer
await response_callback()

async def _no_voice_callback() -> None:
await asyncio.gather(
_flush_callback(),
_timeout_callback(),
)
# Wait for the timeout, if any
timeout_sec = await phone_silence_timeout_sec()
while call.in_progress:
await asyncio.sleep(timeout_sec)
logger.info("Silence triggered after %i sec", timeout_sec)
await timeout_callback()

async def _clear_tts_callback() -> None:
"""
Expand Down Expand Up @@ -663,17 +650,17 @@ async def _clear_tts_callback() -> None:
):
in_empty = True
# Start timeout if not already started
if not no_voice_task:
no_voice_task = asyncio.create_task(_no_voice_callback())
if not flush_task:
flush_task = asyncio.create_task(_flush_callback())

if in_empty:
# Continue to the next audio packet
continue

# Voice detected, cancel the timeout if any
if no_voice_task:
no_voice_task.cancel()
no_voice_task = None
if flush_task:
flush_task.cancel()
flush_task = None

# Start the TTS clear task
if not clear_tts_task:
Expand All @@ -683,6 +670,7 @@ async def _clear_tts_callback() -> None:
def _tts_callback(
automation_client: CallAutomationClient,
call: CallStateModel,
scheduler: aiojobs.Scheduler,
) -> Callable[[str, MessageStyleEnum], Awaitable[None]]:
"""
Send back the TTS to the user.
Expand All @@ -693,16 +681,16 @@ async def wrapper(
text: str,
style: MessageStyleEnum,
) -> None:
await asyncio.gather(
# First, play the TTS to the user
await scheduler.spawn(
handle_play_text(
call=call,
client=automation_client,
style=style,
text=text,
), # First, play the TTS to the user
_db.call_aset(
call
), # Second, save in DB allowing (1) user to cut off the Assistant and (2) SMS answers to be in order
)
)
# Second, save in DB allowing (1) user to cut off the Assistant and (2) SMS answers to be in order
await _db.call_aset(call)

return wrapper
15 changes: 12 additions & 3 deletions app/helpers/call_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,13 @@ async def _handle_play_text(
context: ContextEnum | None,
style: MessageStyleEnum,
text: str,
) -> None:
) -> bool:
"""
Play a text to a call participant.
If `context` is provided, it will be used to track the operation.
Returns `True` if the text was played, `False` otherwise.
"""
logger.info("Playing TTS: %s", text)
try:
Expand All @@ -109,13 +111,15 @@ async def _handle_play_text(
text=text,
),
)
return True
except ResourceNotFoundError:
logger.debug("Call hung up before playing")
except HttpResponseError as e:
if "call already terminated" in e.message.lower():
logger.debug("Call hung up before playing")
else:
raise e
return False


async def handle_media(
Expand Down Expand Up @@ -152,11 +156,13 @@ async def handle_play_text( # noqa: PLR0913
context: ContextEnum | None = None,
store: bool = True,
style: MessageStyleEnum = MessageStyleEnum.NONE,
) -> None:
) -> bool:
"""
Play a text to a call participant.
If `store` is `True`, the text will be stored in the call messages.
Returns `True` if the text was played, `False` otherwise.
"""
# Split text in chunks
chunks = await _chunk_before_tts(
Expand All @@ -168,13 +174,16 @@ async def handle_play_text( # noqa: PLR0913

# Play each chunk
for chunk in chunks:
await _handle_play_text(
res = await _handle_play_text(
call=call,
client=client,
context=context,
style=style,
text=chunk,
)
if not res:
return False
return True


async def handle_clear_queue(
Expand Down
2 changes: 1 addition & 1 deletion app/helpers/config_models/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ def _format(
[line.strip() for line in formatted_prompt.splitlines()]
)

self.logger.debug("Formatted prompt: %s", formatted_prompt)
# self.logger.debug("Formatted prompt: %s", formatted_prompt)
return formatted_prompt

def _messages(
Expand Down
1 change: 1 addition & 0 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,7 @@ async def _communicationservices_event_worker(
call=call,
client=automation_client,
contexts=operation_contexts,
post_callback=_trigger_post_event,
)

case "Microsoft.Communication.PlayCompleted": # Media played
Expand Down
Loading

0 comments on commit 8c9e70d

Please sign in to comment.