From 19eb8ce900c9cb664af817ece7cc1593cb05d00c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= <clemence@lesne.pro>
Date: Wed, 11 Dec 2024 17:15:50 +0100
Subject: [PATCH 01/12] chore: Delete dead code

---
 app/helpers/call_utils.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/app/helpers/call_utils.py b/app/helpers/call_utils.py
index 67e0533..8d12c20 100644
--- a/app/helpers/call_utils.py
+++ b/app/helpers/call_utils.py
@@ -84,14 +84,6 @@ def write(self, audio_buffer: memoryview) -> int:
         self.queue.put_nowait(audio_buffer.tobytes())
         return audio_buffer.nbytes
 
-    def close(self) -> None:
-        """
-        Close the callback.
-        """
-        while not self.queue.empty():
-            self.queue.get_nowait()
-            self.queue.task_done()
-
 
 class ContextEnum(str, Enum):
     """

From f27e24a5e549b3dbcec32c7a33c4f22ee85c335d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= <clemence@lesne.pro>
Date: Wed, 11 Dec 2024 17:16:14 +0100
Subject: [PATCH 02/12] fix: Assistant stop when user speak

---
 app/helpers/call_llm.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/app/helpers/call_llm.py b/app/helpers/call_llm.py
index 44b5489..ff77234 100644
--- a/app/helpers/call_llm.py
+++ b/app/helpers/call_llm.py
@@ -167,6 +167,11 @@ async def _stop_callback() -> None:
             stt_buffer.clear()
             stt_complete_gate.clear()
 
+            # Clear the audio buffer
+            while not audio_out.empty():
+                audio_out.get_nowait()
+                audio_out.task_done()
+
             # Send a stop signal
             await audio_out.put(False)
 

From c252d9380e1c78c298eee59eacb7ea5b54fcbad7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= <clemence@lesne.pro>
Date: Wed, 11 Dec 2024 17:18:07 +0100
Subject: [PATCH 03/12] fix: Cut voice in the first seconds

---
 app/helpers/call_llm.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/app/helpers/call_llm.py b/app/helpers/call_llm.py
index ff77234..5cf08ea 100644
--- a/app/helpers/call_llm.py
+++ b/app/helpers/call_llm.py
@@ -175,9 +175,14 @@ async def _stop_callback() -> None:
             # Send a stop signal
             await audio_out.put(False)
 
-        async def _commit_answer(tool_blacklist: set[str] | None = None) -> None:
+        async def _commit_answer(
+            wait: bool,
+            tool_blacklist: set[str] | None = None,
+        ) -> None:
             """
             Process the response.
+
+            Start the chat task and wait for its response if needed. Job is stored in `last_response` shared variable.
             """
             # Stop any previous response
             await _stop_callback()
@@ -197,7 +202,8 @@ async def _commit_answer(tool_blacklist: set[str] | None = None) -> None:
             )
 
             # Wait for its response
-            await last_response.wait()
+            if wait:
+                await last_response.wait()
 
         async def _response_callback(_retry: bool = False) -> None:
             """
@@ -240,7 +246,7 @@ async def _response_callback(_retry: bool = False) -> None:
             stt_buffer.clear()
 
             # Process the response
-            await _commit_answer()
+            await _commit_answer(wait=True)
 
         # First call
         if len(call.messages) <= 1:
@@ -255,7 +261,8 @@ async def _response_callback(_retry: bool = False) -> None:
         else:
             # Welcome with the LLM, do not use the end call tool for the first message, LLM hallucinates it and this is extremely frustrating for the user
             await _commit_answer(
-                {"end_call"},
+                tool_blacklist={"end_call"},
+                wait=False,
             )
 
         await asyncio.gather(

From bcf0a529c5e24a09c9b9246b7313bffbf65f5cfe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= <clemence@lesne.pro>
Date: Wed, 11 Dec 2024 17:18:30 +0100
Subject: [PATCH 04/12] perf: Lower latency for cutting voice

---
 app/helpers/call_llm.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/app/helpers/call_llm.py b/app/helpers/call_llm.py
index 5cf08ea..40d3df3 100644
--- a/app/helpers/call_llm.py
+++ b/app/helpers/call_llm.py
@@ -184,9 +184,6 @@ async def _commit_answer(
 
             Start the chat task and wait for its response if needed. Job is stored in `last_response` shared variable.
             """
-            # Stop any previous response
-            await _stop_callback()
-
             # Start chat task
             nonlocal last_response
             last_response = await scheduler.spawn(
@@ -228,6 +225,9 @@ async def _response_callback(_retry: bool = False) -> None:
                 await asyncio.sleep(0.2)
                 return await _response_callback(_retry=True)
 
+            # Stop any previous response
+            await _stop_callback()
+
             # Add it to the call history and update last interaction
             logger.info("Voice stored: %s", stt_buffer)
             async with _db.call_transac(
@@ -242,9 +242,6 @@ async def _response_callback(_retry: bool = False) -> None:
                     )
                 )
 
-            # Clear the recognition buffer
-            stt_buffer.clear()
-
             # Process the response
             await _commit_answer(wait=True)
 

From f474234f75a743de28ab8b478705439eb6696929 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= <clemence@lesne.pro>
Date: Wed, 11 Dec 2024 17:18:40 +0100
Subject: [PATCH 05/12] dev: Enhance logging

---
 app/helpers/call_llm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/helpers/call_llm.py b/app/helpers/call_llm.py
index 40d3df3..ad74678 100644
--- a/app/helpers/call_llm.py
+++ b/app/helpers/call_llm.py
@@ -229,7 +229,7 @@ async def _response_callback(_retry: bool = False) -> None:
             await _stop_callback()
 
             # Add it to the call history and update last interaction
-            logger.info("Voice stored: %s", stt_buffer)
+            logger.info("Voice stored: %s", stt_text)
             async with _db.call_transac(
                 call=call,
                 scheduler=scheduler,

From fc6ac38c946b60c0261978a7ae53426e3214204d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= <clemence@lesne.pro>
Date: Wed, 11 Dec 2024 17:18:57 +0100
Subject: [PATCH 06/12] dev: Display log when voice is cutted

---
 app/helpers/call_llm.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/app/helpers/call_llm.py b/app/helpers/call_llm.py
index ad74678..147ae43 100644
--- a/app/helpers/call_llm.py
+++ b/app/helpers/call_llm.py
@@ -729,9 +729,8 @@ async def _wait_for_stop() -> None:
         # Wait before clearing the TTS queue
         await asyncio.sleep(timeout_ms / 1000)
 
-        logger.debug("Canceling TTS after %i ms", timeout_ms)
-
         # Clear the queue
+        logger.info("Stoping TTS after %i ms", timeout_ms)
         await stop_callback()
 
     while True:

From df6590baf2c9b1d5994e0a6e19c1a34b74e4d784 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= <clemence@lesne.pro>
Date: Wed, 11 Dec 2024 17:19:12 +0100
Subject: [PATCH 07/12] dev: Display debug log when audio is stopped

---
 app/main.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/app/main.py b/app/main.py
index 0ffdaf4..18d7e94 100644
--- a/app/main.py
+++ b/app/main.py
@@ -625,6 +625,7 @@ async def _send_audio() -> None:
                     )
                 # Stop audio
                 elif audio_data is False:
+                    logger.debug("Stop audio event received, stopping audio")
                     await websocket.send_json(
                         {
                             "kind": "StopAudio",

From 2b893169c29f6b0443d8a72f9be50627b3dfbf9d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= <clemence@lesne.pro>
Date: Wed, 11 Dec 2024 18:36:45 +0100
Subject: [PATCH 08/12] ux: Fine-tune voice cut delay

---
 README.md               | 4 ++--
 app/helpers/features.py | 4 ++--
 cicd/bicep/app.bicep    | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index dfd44d7..38e14a8 100644
--- a/README.md
+++ b/README.md
@@ -498,8 +498,8 @@ Conversation options are represented as features. They can be configured from Ap
 | `recognition_retry_max` | The maximum number of retries for voice recognition. | `int` | 2 |
 | `recording_enabled` | Whether call recording is enabled. | `bool` | false |
 | `slow_llm_for_chat` | Whether to use the slow LLM for chat. | `bool` | false |
-| `vad_cutoff_timeout_ms` | The cutoff timeout for voice activity detection in secs. | `int` | 600 |
-| `vad_silence_timeout_ms` | The timeout for phone silence in secs. | `int` | 400 |
+| `vad_cutoff_timeout_ms` | The cutoff timeout for voice activity detection in secs. | `int` | 300 |
+| `vad_silence_timeout_ms` | The timeout for phone silence in secs. | `int` | 500 |
 | `vad_threshold` | The threshold for voice activity detection. | `float` | 0.5 |
 
 ### Use an OpenAI compatible model for the LLM
diff --git a/app/helpers/features.py b/app/helpers/features.py
index 392f773..2df8fa1 100644
--- a/app/helpers/features.py
+++ b/app/helpers/features.py
@@ -59,7 +59,7 @@ async def vad_threshold() -> float:
 
 async def vad_silence_timeout_ms() -> int:
     return await _default(
-        default=400,
+        default=500,
         key="vad_silence_timeout_ms",
         type_res=int,
     )
@@ -67,7 +67,7 @@ async def vad_silence_timeout_ms() -> int:
 
 async def vad_cutoff_timeout_ms() -> int:
     return await _default(
-        default=600,
+        default=300,
         key="vad_cutoff_timeout_ms",
         type_res=int,
     )
diff --git a/cicd/bicep/app.bicep b/cicd/bicep/app.bicep
index 16e60eb..f7ffafe 100644
--- a/cicd/bicep/app.bicep
+++ b/cicd/bicep/app.bicep
@@ -907,8 +907,8 @@ resource configValues 'Microsoft.AppConfiguration/configurationStores/keyValues@
     recognition_retry_max: 2
     recording_enabled: false
     slow_llm_for_chat: false
-    vad_cutoff_timeout_ms: 600
-    vad_silence_timeout_ms: 400
+    vad_cutoff_timeout_ms: 300
+    vad_silence_timeout_ms: 500
     vad_threshold: '0.5'
   }): {
     parent: configStore

From 843fe28efbf95634589e67a46e73581c40c4b3ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= <clemence@lesne.pro>
Date: Wed, 11 Dec 2024 18:37:18 +0100
Subject: [PATCH 09/12] dev: Add log for recognition timeout

---
 app/helpers/call_llm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/helpers/call_llm.py b/app/helpers/call_llm.py
index 147ae43..00ae850 100644
--- a/app/helpers/call_llm.py
+++ b/app/helpers/call_llm.py
@@ -212,7 +212,7 @@ async def _response_callback(_retry: bool = False) -> None:
             try:
                 await asyncio.wait_for(stt_complete_gate.wait(), timeout=0.05)
             except TimeoutError:
-                pass
+                logger.debug("Complete recognition timeout, using partial recognition")
 
             stt_text = " ".join(stt_buffer).strip()
 

From f2281b965d773ac5f4564081740049bfe47c9842 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= <clemence@lesne.pro>
Date: Wed, 11 Dec 2024 18:37:33 +0100
Subject: [PATCH 10/12] quality: Code quality

---
 app/helpers/call_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/helpers/call_utils.py b/app/helpers/call_utils.py
index 8d12c20..f09627b 100644
--- a/app/helpers/call_utils.py
+++ b/app/helpers/call_utils.py
@@ -727,12 +727,12 @@ async def _process_one(self, input_pcm: bytes) -> None:
         # Apply noise reduction
         reduced_signal = reduce_noise(
             # Input signal
-            clip_noise_stationary=False,
             sr=self._sample_rate,
             y=input_signal,
             # Performance
             n_fft=self._chunk_size,
             # Since the reference signal is already noise-reduced, we can assume it's stationary
+            clip_noise_stationary=False,  # Noise is longer than the signal
             stationary=True,
             y_noise=self._bot_voice_buffer,
             # Output quality

From 37de648f58c5e28149f2588a1a83d7afc0835b19 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= <clemence@lesne.pro>
Date: Wed, 11 Dec 2024 18:38:03 +0100
Subject: [PATCH 11/12] perf: Limit echo in the final audio

100% of background removal seems fine.
---
 app/helpers/call_utils.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/app/helpers/call_utils.py b/app/helpers/call_utils.py
index f09627b..499a18b 100644
--- a/app/helpers/call_utils.py
+++ b/app/helpers/call_utils.py
@@ -735,8 +735,6 @@ async def _process_one(self, input_pcm: bytes) -> None:
             clip_noise_stationary=False,  # Noise is longer than the signal
             stationary=True,
             y_noise=self._bot_voice_buffer,
-            # Output quality
-            prop_decrease=0.75,  # Reduce noise by 75%
         )
 
         # Perform VAD test

From e3f72d56e6bdf0fe8a19d93f11da18ce585d2e70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= <clemence@lesne.pro>
Date: Wed, 11 Dec 2024 18:38:19 +0100
Subject: [PATCH 12/12] perf: Enhance echo detection

---
 app/helpers/call_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/helpers/call_utils.py b/app/helpers/call_utils.py
index 499a18b..1076706 100644
--- a/app/helpers/call_utils.py
+++ b/app/helpers/call_utils.py
@@ -729,8 +729,8 @@ async def _process_one(self, input_pcm: bytes) -> None:
             # Input signal
             sr=self._sample_rate,
             y=input_signal,
-            # Performance
-            n_fft=self._chunk_size,
+            # Quality
+            n_fft=128,
             # Since the reference signal is already noise-reduced, we can assume it's stationary
             clip_noise_stationary=False,  # Noise is longer than the signal
             stationary=True,