diff --git a/README.md b/README.md index 44941f6..a1a6be7 100644 --- a/README.md +++ b/README.md @@ -498,7 +498,7 @@ Conversation options are represented as features. They can be configured from Ap | `recognition_retry_max` | The maximum number of retries for voice recognition. | `int` | 2 | | `recording_enabled` | Whether call recording is enabled. | `bool` | false | | `slow_llm_for_chat` | Whether to use the slow LLM for chat. | `bool` | false | -| `vad_cutoff_timeout_ms` | The cutoff timeout for voice activity detection in secs. | `int` | 150 | +| `vad_cutoff_timeout_ms` | The cutoff timeout for voice activity detection in secs. | `int` | 250 | | `vad_silence_timeout_ms` | The timeout for phone silence in secs. | `int` | 500 | | `vad_threshold` | The threshold for voice activity detection. | `float` | 0.5 | diff --git a/app/helpers/call_llm.py b/app/helpers/call_llm.py index 07c004e..26974c6 100644 --- a/app/helpers/call_llm.py +++ b/app/helpers/call_llm.py @@ -178,7 +178,7 @@ async def _stop_callback() -> None: async def _commit_answer( wait: bool, - tool_blacklist: set[str] | None = None, + tool_blacklist: set[str] = set(), ) -> None: """ Process the response. @@ -289,9 +289,9 @@ async def _continue_chat( # noqa: PLR0915, PLR0913 client: CallAutomationClient, post_callback: Callable[[CallStateModel], Awaitable[None]], scheduler: Scheduler, - tool_blacklist: set[str] | None, training_callback: Callable[[CallStateModel], Awaitable[None]], tts_client: SpeechSynthesizer, + tool_blacklist: set[str] = set(), _iterations_remaining: int = 3, ) -> CallStateModel: """ @@ -482,7 +482,7 @@ async def _generate_chat_completion( # noqa: PLR0913, PLR0911, PLR0912, PLR0915 client: CallAutomationClient, post_callback: Callable[[CallStateModel], Awaitable[None]], scheduler: Scheduler, - tool_blacklist: set[str] | None, + tool_blacklist: set[str], tts_callback: Callable[[str, MessageStyleEnum], Awaitable[None]], tts_client: SpeechSynthesizer, use_tools: bool, @@ -539,7 +539,7 @@ async def _content_callback(buffer: str) -> None: if not use_tools: logger.warning("Tools disabled for this chat") else: - tools = await plugins.to_openai(tool_blacklist) + tools = await plugins.to_openai(frozenset(tool_blacklist)) # logger.debug("Tools: %s", tools) # Execute LLM inference @@ -626,7 +626,13 @@ async def _content_callback(buffer: str) -> None: scheduler=scheduler, ): await asyncio.gather( - *[plugins.execute_tool(tool_call) for tool_call in tool_calls] + *[ + plugins.execute( + blacklist=tool_blacklist, + tool=tool_call, + ) + for tool_call in tool_calls + ] ) # Update call model if object reference changed diff --git a/app/helpers/config_models/prompts.py b/app/helpers/config_models/prompts.py index 56b1e61..1014960 100644 --- a/app/helpers/config_models/prompts.py +++ b/app/helpers/config_models/prompts.py @@ -62,7 +62,8 @@ class LlmModel(BaseModel): - Provide a clear and concise summary of the conversation at the beginning of each call - Respond only if it is related to the objective or the claim - To list things, use bullet points or numbered lists - - Use a lot of discourse markers, fillers, to make the conversation human-like + - Use a lot of discourse markers, fillers, to make the conversation human-like (e.g., "Well, let me think...", "So, what I can do for you is...", "I see, you are in Paris...") + - Use short sentences and simple words - Use tools as often as possible and describe the actions you take - When the customer says a word and then spells out letters, this means that the word is written in the way the customer spelled it (e.g., "I live in Paris PARIS" -> "Paris", "My name is John JOHN" -> "John", "My email is Clemence CLEMENCE at gmail dot com" -> "clemence@gmail.com") - Work for {bot_company}, not someone else @@ -147,7 +148,7 @@ class LlmModel(BaseModel): Conversation objective: Fill the claim with the customer. Claim is about a car accident. User: action=talk I had an accident this morning, I was shopping. Let me send the exact location by SMS. User: action=sms At the corner of Rue de la Paix and Rue de Rivoli. - Tools: update incident location,n + Tools: update incident location Assistant: style=sad I get it, you had an accident this morning while shopping. style=none I have updated your file with the location you sent me by SMS. style=cheerful Is it correct? ## Example 7 diff --git a/app/helpers/features.py b/app/helpers/features.py index d9b4f8e..66497e9 100644 --- a/app/helpers/features.py +++ b/app/helpers/features.py @@ -72,7 +72,7 @@ async def vad_silence_timeout_ms(scheduler: Scheduler) -> int: async def vad_cutoff_timeout_ms(scheduler: Scheduler) -> int: return await _default( - default=150, + default=250, key="vad_cutoff_timeout_ms", scheduler=scheduler, type_res=int, diff --git a/app/helpers/llm_tools.py b/app/helpers/llm_tools.py index 8a45275..79d4f75 100644 --- a/app/helpers/llm_tools.py +++ b/app/helpers/llm_tools.py @@ -9,7 +9,7 @@ handle_transfer, ) from app.helpers.config import CONFIG -from app.helpers.llm_utils import AbstractPlugin +from app.helpers.llm_utils import AbstractPlugin, add_customer_response from app.helpers.logging import logger from app.models.call import CallStateModel from app.models.message import ( @@ -31,6 +31,7 @@ class UpdateClaimDict(TypedDict): class DefaultPlugin(AbstractPlugin): + # No customer response, we have a pre-defined response async def end_call(self) -> str: """ Use this if the customer said they want to end the call. @@ -58,22 +59,14 @@ async def end_call(self) -> str: ) return "Call ended" + @add_customer_response( + [ + "I'am creating it right now.", + "We'll start a case.", + ] + ) async def new_claim( self, - customer_response: Annotated[ - str, - """ - Phrase used to confirm the update, in the same language as the customer. This phrase will be spoken to the user. - - # Rules - - Action should be rephrased in the present tense - - Must be in a single sentence - - # Examples - - "I'am creating it right now." - - "We'll start a case." - """, - ], ) -> str: """ Use this if the customer wants to create a new claim. @@ -90,9 +83,6 @@ async def new_claim( - Customer wants explicitely to create a new claim - Talking about a totally different subject """ - # Customer confirmation - await self.tts_callback(customer_response) - # Launch post-call intelligence for the current call await self.post_callback(self.call) @@ -116,23 +106,15 @@ async def new_claim( ) return "Claim, reminders and messages reset" + @add_customer_response( + [ + "A todo for next week is planned.", + "I'm creating a reminder for the company to manage this for you.", + "The rendez-vous is scheduled for tomorrow.", + ] + ) async def new_or_updated_reminder( self, - customer_response: Annotated[ - str, - """ - Phrase used to confirm the update, in the same language as the customer. This phrase will be spoken to the user. - - # Rules - - Action should be rephrased in the present tense - - Must be in a single sentence - - # Examples - - "A todo for next week is planned." - - "I'm creating a reminder for the company to manage this for you." - - "The rendez-vous is scheduled for tomorrow." - """, - ], description: Annotated[ str, "Description of the reminder, in English. Should be detailed enough to be understood by anyone. Example: 'Call back customer to get more details about the accident', 'Send analysis report to the customer'.", @@ -167,9 +149,6 @@ async def new_or_updated_reminder( - Call back for a follow-up - Wait for customer to send a document """ - # Customer confirmation - await self.tts_callback(customer_response) - # Check if reminder already exists, if so update it for reminder in self.call.reminders: if reminder.title == title: @@ -194,23 +173,15 @@ async def new_or_updated_reminder( except ValidationError as e: return f'Failed to create reminder "{title}": {e.json()}' + @add_customer_response( + [ + "I am updating the claim with your new address.", + "The phone number is now stored in the case.", + "Your birthdate is written down.", + ] + ) async def updated_claim( self, - customer_response: Annotated[ - str, - """ - Phrase used to confirm the update, in the same language as the customer. This phrase will be spoken to the user. - - # Rules - - Action should be rephrased in the present tense - - Must be in a single sentence - - # Examples - - "I am updating the claim with your new address." - - "The phone number is now stored in the case." - - "Your birthdate is written down." - """, - ], updates: Annotated[ list[UpdateClaimDict], """ @@ -255,9 +226,6 @@ async def updated_claim( - Store details about the conversation - Update the claim with a new phone number """ - # Customer confirmation - await self.tts_callback(customer_response) - # Update all claim fields res = "# Updated fields" for field in updates: @@ -279,6 +247,13 @@ def _update_claim_field(self, update: UpdateClaimDict) -> str: self.call.claim[field] = old_value return f'Failed to edit field "{field}": {e.json()}' + @add_customer_response( + [ + "Connecting you to a human agent.", + "I'm calling a human to help you.", + "Transfer to a human agent in progress.", + ] + ) async def talk_to_human(self) -> str: """ Use this if the customer wants to talk to a human and Assistant is unable to help. @@ -311,24 +286,16 @@ async def talk_to_human(self) -> str: ) return "Transferring to human agent" + @add_customer_response( + [ + "I am looking for the article about the new law on cyber security.", + "I am looking in our database for your car insurance contract.", + "I am searching for the procedure to declare a stolen luxury watch.", + "I'm looking for this document in our database.", + ] + ) async def search_document( self, - customer_response: Annotated[ - str, - """ - Phrase used to confirm the update, in the same language as the customer. This phrase will be spoken to the user. - - # Rules - - Action should be rephrased in the present tense - - Must be in a single sentence - - # Examples - - "I am looking for the article about the new law on cyber security." - - "I am looking in our database for your car insurance contract." - - "I am searching for the procedure to declare a stolen luxury watch." - - "I'm looking for this document in our database." - """, - ], queries: Annotated[ list[str], "The text queries to perform the search, in English. Example: ['How much does it cost to repair a broken window?', 'What are the requirements to ask for a cyber attack insurance?']", @@ -349,9 +316,6 @@ async def search_document( - Know the procedure to declare a stolen luxury watch - Understand the requirements to ask for a cyber attack insurance """ - # Customer confirmation - await self.tts_callback(customer_response) - # Execute in parallel tasks = await asyncio.gather( *[ @@ -377,23 +341,15 @@ async def search_document( res += f"\n{trainings_str}" return res + @add_customer_response( + [ + "I am calling the firefighters to help you with the fire.", + "I am notifying the emergency services right now.", + "The pharmacy is notified for the emergency.", + ] + ) async def notify_emergencies( self, - customer_response: Annotated[ - str, - """ - Phrase used to confirm the update, in the same language as the customer. This phrase will be spoken to the user. - - # Rules - - Action should be rephrased in the present tense - - Must be in a single sentence - - # Examples - - 'I am calling the firefighters to help you with the fire.' - - 'I am contacting the police for the accident with your neighbor.' - - 'I am notifying the emergency services right now.' - """, - ], reason: Annotated[ str, "The reason to notify the emergency services. Should be detailed enough to be understood by anyone. Example: 'A person is having a heart attack', 'A child is being attacked by a dog'.", @@ -426,8 +382,6 @@ async def notify_emergencies( - A neighbor is having a heart attack - Someons is stuck in a car accident """ - # Customer confirmation - await self.tts_callback(customer_response) # TODO: Implement notification to emergency services for production usage logger.info( "Notifying %s, location %s, contact %s, reason %s", @@ -438,24 +392,16 @@ async def notify_emergencies( ) return f"Notifying {service} for {reason}" + @add_customer_response( + [ + "I am sending a SMS to your phone number.", + "I am texting you the information right now.", + "I'am sending it.", + "SMS with the details is sent.", + ] + ) async def send_sms( self, - customer_response: Annotated[ - str, - """ - Phrase used to confirm the update, in the same language as the customer. This phrase will be spoken to the user. - - # Rules - - Action should be rephrased in the present tense - - Must be in a single sentence - - # Examples - - "I am sending a SMS to your phone number." - - "I am texting you the information right now." - - "I'am sending it." - - "SMS with the details is sent." - """, - ], message: Annotated[ str, "The message to send to the customer.", @@ -469,9 +415,6 @@ async def send_sms( - Confirm a detail like a reference number, if there is a misunderstanding - Send a confirmation, if the customer wants to have a written proof """ - # Customer confirmation - await self.tts_callback(customer_response) - # Send SMS success = await _sms.send( content=message, @@ -490,23 +433,16 @@ async def send_sms( ) return "SMS sent" + @add_customer_response( + [ + "I am slowing down the speech.", + "Is it better now that I am speaking slower?", + "My voice is now faster.", + ], + before=False, # Speak after the speed change + ) async def speech_speed( self, - customer_response: Annotated[ - str, - """ - Phrase used to confirm the update, in the same language as the customer. This phrase will be spoken to the user. - - # Rules - - Action should be rephrased in the present tense - - Must be in a single sentence - - # Examples - - "I am slowing down the speech." - - "Is it better now that I am speaking slower?" - - "My voice is now faster." - """, - ], speed: Annotated[ float, "The new speed of the voice. Should be between 0.75 and 1.25, where 1.0 is the normal speed.", @@ -530,29 +466,19 @@ async def speech_speed( initial_speed = self.call.initiate.prosody_rate self.call.initiate.prosody_rate = speed - # Customer confirmation (with new speed) - await self.tts_callback(customer_response) - # LLM confirmation return f"Voice speed set to {speed} (was {initial_speed})" + @add_customer_response( + [ + "For de-DE, 'Ich spreche jetzt auf Deutsch.'", + "For en-ES, 'Espero que me entiendas mejor en español.'", + "For fr-FR, 'Cela devrait être mieux en français.'", + ], + before=False, # Speak after the language change + ) async def speech_lang( self, - customer_response: Annotated[ - str, - """ - Phrase used to confirm the update, in the new selected language. This phrase will be spoken to the user. - - # Rules - - Action should be rephrased in the present tense - - Must be in a single sentence - - # Examples - - For de-DE, "Ich spreche jetzt auf Deutsch." - - For en-ES, "Espero que me entiendas mejor en español." - - For fr-FR, "Cela devrait être mieux en français." - """, - ], lang: Annotated[ str, """ @@ -596,8 +522,5 @@ async def speech_lang( initial_lang = self.call.lang.short_code self.call.lang = lang - # Customer confirmation (with new language) - await self.tts_callback(customer_response) - # LLM confirmation return f"Voice language set to {lang} (was {initial_lang})" diff --git a/app/helpers/llm_utils.py b/app/helpers/llm_utils.py index 128ff64..9c5ba80 100644 --- a/app/helpers/llm_utils.py +++ b/app/helpers/llm_utils.py @@ -7,9 +7,10 @@ import inspect import json from collections.abc import Awaitable, Callable -from functools import cache +from functools import cache, wraps from inspect import getmembers, isfunction from textwrap import dedent +from types import FunctionType from typing import Annotated, Any, ForwardRef, TypeVar from aiojobs import Scheduler @@ -25,6 +26,7 @@ from pydantic._internal._typing_extra import eval_type_lenient from pydantic.json_schema import JsonSchemaValue +from app.helpers.cache import async_lru_cache from app.helpers.logging import logger from app.helpers.monitoring import SpanAttributes, span_attribute, tracer from app.models.call import CallStateModel @@ -71,26 +73,28 @@ def __init__( # noqa: PLR0913 self.tts_callback = tts_callback self.tts_client = tts_client + @async_lru_cache() async def to_openai( self, - blacklist: set[str] | None, + blacklist: frozenset[str], ) -> list[ChatCompletionToolParam]: """ Get the OpenAI SDK schema for all functions of the plugin, excluding the ones in the blacklist. """ + functions = self._available_functions(frozenset(blacklist)) return await asyncio.gather( - *[ - _function_schema(arg_type, call=self.call) - for name, arg_type in getmembers(self.__class__, isfunction) - if not name.startswith("_") - and name != "to_openai" - and name not in (blacklist or set()) - ] + *[_function_schema(func, call=self.call) for func in functions] ) - @tracer.start_as_current_span("plugin_execute_tool") - async def execute_tool(self, tool: ToolModel) -> None: - functions = self._available_functions() + @tracer.start_as_current_span("plugin_execute") + async def execute( + self, + tool: ToolModel, + blacklist: set[str], + ) -> None: + functions = [ + func.__name__ for func in self._available_functions(frozenset(blacklist)) + ] json_str = tool.function_arguments name = tool.function_name @@ -138,13 +142,8 @@ async def execute_tool(self, tool: ToolModel) -> None: logger.info("Executed function %s (%s): %s", name, args, res_log) # Catch wrong arguments - except TypeError as e: - logger.warning( - "Wrong arguments for function %s: %s. Error: %s", - name, - args, - e, - ) + except TypeError: + logger.exception("Wrong arguments for function %s: %s.", name, args) res = "Wrong arguments, please fix them and try again." res_log = res @@ -164,15 +163,100 @@ async def execute_tool(self, tool: ToolModel) -> None: span_attribute(SpanAttributes.TOOL_RESULT, tool.content) @cache - def _available_functions(self) -> list[str]: + def _available_functions( + self, + blacklist: frozenset[str], + ) -> list[FunctionType]: """ List all available functions of the plugin, including the inherited ones. """ - return [name for name, _ in getmembers(self.__class__, isfunction)] + return [ + func + for name, func in getmembers(self.__class__, isfunction) + if not name.startswith("_") + and name not in [func.__name__ for func in [self.to_openai, self.execute]] + and name not in blacklist + ] + + +def add_customer_response( + response_examples: list[str], + before: bool = True, +): + """ + Decorator to add a customer response to a tool. + + Examples are used to generate the tool prompt. + + Example: + + ```python + @add_customer_response( + response_examples=[ + "I updated the contact information.", + "I changed the address.", + ], + ) + async def update_contact_information(...) -> str: + # ... + return "Contact information updated." + """ + + def decorator(func): + @wraps(func) + async def wrapper( + self: AbstractPlugin, + *args, + customer_response: str, + **kwargs, + ): + # If before, execute all in parallel + if before: + _, res = await asyncio.gather( + self.tts_callback(customer_response), + func(self, *args, **kwargs), + ) + + # If after, call context should change, so execute sequentially + else: + res = await func(self, *args, **kwargs) + await self.tts_callback(customer_response) + + return res + + # Update the signature of the function + func.__signature__ = inspect.signature(func).replace( + parameters=[ + *inspect.signature(func).parameters.values(), + inspect.Parameter( + kind=inspect.Parameter.POSITIONAL_OR_KEYWORD, + name="customer_response", + annotation=Annotated[ + str, + f""" + Phrase used to confirm the update, in the same language as the customer. This phrase will be spoken to the user. + + # Rules + - Action should be rephrased in the present tense + - Must be in a single short sentence + - Use simple language + + # Examples + {"\n- ".join(response_examples)} + """, + ], + ), + ] + ) + + return wrapper + + return decorator async def _function_schema( - f: Callable[..., Any], **kwargs: Any + f: Callable[..., Any], + **kwargs: Any, ) -> ChatCompletionToolParam: """ Take a function and return a JSON schema for it as defined by the OpenAI API. diff --git a/cicd/bicep/app.bicep b/cicd/bicep/app.bicep index 6389ac3..c232a86 100644 --- a/cicd/bicep/app.bicep +++ b/cicd/bicep/app.bicep @@ -907,7 +907,7 @@ resource configValues 'Microsoft.AppConfiguration/configurationStores/keyValues@ recognition_retry_max: 2 recording_enabled: false slow_llm_for_chat: false - vad_cutoff_timeout_ms: 150 + vad_cutoff_timeout_ms: 250 vad_silence_timeout_ms: 500 vad_threshold: '0.5' }): { diff --git a/tests/conftest.py b/tests/conftest.py index 839f4b5..c8e54d8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,6 +7,7 @@ from typing import Any import pytest +import pytest_asyncio import yaml from _pytest.mark.structures import MarkDecorator from azure.cognitiveservices.speech import ( @@ -18,7 +19,9 @@ from azure.communication.callautomation import FileSource, SsmlSource, TextSource from azure.communication.callautomation._generated.aio.operations import ( CallMediaOperations, + CallRecordingOperations, ) +from azure.communication.callautomation._generated.models import RecordingStateResponse from azure.communication.callautomation._models import TransferCallResult from azure.communication.callautomation.aio import ( CallAutomationClient, @@ -48,6 +51,18 @@ async def start_media_streaming( pass +class CallRecordingOperationsMock(CallRecordingOperations): + def __init__(self) -> None: + pass + + async def start_recording( + self, + *args, # noqa: ARG002 + **kwargs, # noqa: ARG002 + ) -> RecordingStateResponse: + return RecordingStateResponse() + + class CallConnectionClientMock(CallConnectionClient): _call_connection_id: str = "dummy" _call_media_client = CallMediaOperationsMock() @@ -125,6 +140,7 @@ def _log_media(self, play_source: FileSource | TextSource | SsmlSource) -> None: class CallAutomationClientMock(CallAutomationClient): _call_client: CallConnectionClientMock + _call_recording_client = CallRecordingOperationsMock() def __init__( self, @@ -152,13 +168,13 @@ class SpeechSynthesizerMock(SpeechSynthesizer): def __init__(self, play_media_callback: Callable[[str], None]) -> None: self._play_media_callback = play_media_callback - def speak_text_async( + def speak_ssml_async( self, - text: str, + ssml: str, *args, # noqa: ARG002 **kwargs, # noqa: ARG002 ) -> ResultFuture: - self._play_media_callback(text) + self._play_media_callback(ssml) return ResultFuture( async_handle=_spx_handle(0), get_function=lambda _: _spx_handle(0), @@ -291,14 +307,17 @@ def random_text() -> str: return text -@pytest.fixture -def call() -> CallStateModel: - call = CallStateModel( - initiate=CallInitiateModel( - **CONFIG.conversation.initiate.model_dump(), - phone_number="+33612345678", # pyright: ignore - ), - voice_id="dummy", +@pytest_asyncio.fixture +async def call() -> CallStateModel: + db = CONFIG.database.instance() + call = await db.call_create( + CallStateModel( + initiate=CallInitiateModel( + **CONFIG.conversation.initiate.model_dump(), + phone_number="+33612345678", # pyright: ignore + ), + voice_id="dummy", + ) ) return call diff --git a/tests/llm.py b/tests/llm.py index 695b929..0dc17c2 100644 --- a/tests/llm.py +++ b/tests/llm.py @@ -322,7 +322,6 @@ async def _training_callback(_call: CallStateModel) -> None: client=automation_client, post_callback=_post_callback, scheduler=scheduler, - tool_blacklist=None, training_callback=_training_callback, tts_client=tts_client, ) diff --git a/tests/local.py b/tests/local.py index 6a0ad05..90fe63a 100644 --- a/tests/local.py +++ b/tests/local.py @@ -105,7 +105,6 @@ async def _training_callback(_call: CallStateModel) -> None: client=automation_client, post_callback=_post_callback, scheduler=scheduler, - tool_blacklist=None, training_callback=_training_callback, tts_client=tts_client, )