Fix voice selection (#2664)

Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <[email protected]> Co-authored-by: crazywoola <[email protected]>
langgenius · Mar 4, 2024 · 6a6133c · 6a6133c
1 parent 3c18251
commit 6a6133c
Show file tree

Hide file tree

Showing 9 changed files with 14 additions and 7 deletions.
diff --git a/api/controllers/console/app/audio.py b/api/controllers/console/app/audio.py
@@ -88,7 +88,7 @@ def post(self, app_id):
             response = AudioService.transcript_tts(
                 tenant_id=app_model.tenant_id,
                 text=request.form['text'],
-                voice=app_model.app_model_config.text_to_speech_dict.get('voice'),
+                voice=request.form['voice'] if request.form['voice'] else app_model.app_model_config.text_to_speech_dict.get('voice'),
                 streaming=False
             )
 

diff --git a/api/controllers/console/explore/audio.py b/api/controllers/console/explore/audio.py
@@ -85,7 +85,7 @@ def post(self, installed_app):
             response = AudioService.transcript_tts(
                 tenant_id=app_model.tenant_id,
                 text=request.form['text'],
-                voice=app_model.app_model_config.text_to_speech_dict.get('voice'),
+                voice=request.form['voice'] if request.form['voice'] else app_model.app_model_config.text_to_speech_dict.get('voice'),
                 streaming=False
             )
             return {'data': response.data.decode('latin1')}

diff --git a/api/controllers/service_api/app/audio.py b/api/controllers/service_api/app/audio.py
@@ -87,7 +87,7 @@ def post(self, app_model: App, end_user: EndUser):
                 tenant_id=app_model.tenant_id,
                 text=args['text'],
                 end_user=end_user,
-                voice=app_model.app_model_config.text_to_speech_dict.get('voice'),
+                voice=args['voice'] if args['voice'] else app_model.app_model_config.text_to_speech_dict.get('voice'),
                 streaming=args['streaming']
             )
 

diff --git a/api/controllers/web/audio.py b/api/controllers/web/audio.py
@@ -84,7 +84,7 @@ def post(self, app_model: App, end_user):
                 tenant_id=app_model.tenant_id,
                 text=request.form['text'],
                 end_user=end_user.external_user_id,
-                voice=app_model.app_model_config.text_to_speech_dict.get('voice'),
+                voice=request.form['voice'] if request.form['voice'] else app_model.app_model_config.text_to_speech_dict.get('voice'),
                 streaming=False
             )
 

diff --git a/api/core/model_runtime/model_providers/openai/tts/tts.py b/api/core/model_runtime/model_providers/openai/tts/tts.py
@@ -34,7 +34,7 @@ def _invoke(self, model: str, tenant_id: str, credentials: dict,
         :return: text translated to audio file
         """
         audio_type = self._get_model_audio_type(model, credentials)
-        if not voice:
+        if not voice or voice not in [d['value'] for d in self.get_tts_model_voices(model=model, credentials=credentials)]:
             voice = self._get_model_default_voice(model, credentials)
         if streaming:
             return Response(stream_with_context(self._tts_invoke_streaming(model=model,

diff --git a/api/core/model_runtime/model_providers/tongyi/tts/tts.py b/api/core/model_runtime/model_providers/tongyi/tts/tts.py
@@ -34,7 +34,7 @@ def _invoke(self, model: str, tenant_id: str, credentials: dict, content_text: s
         :return: text translated to audio file
         """
         audio_type = self._get_model_audio_type(model, credentials)
-        if not voice or voice not in self.get_tts_model_voices(model=model, credentials=credentials):
+        if not voice or voice not in [d['value'] for d in self.get_tts_model_voices(model=model, credentials=credentials)]:
             voice = self._get_model_default_voice(model, credentials)
         if streaming:
             return Response(stream_with_context(self._tts_invoke_streaming(model=model,

diff --git a/web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx b/web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx
@@ -40,6 +40,7 @@ const TextToSpeech: FC = () => {
           { languageInfo?.example && (
             <AudioBtn
               value={languageInfo?.example}
+              voice={voiceItem?.value}
               isAudition={true}
             />
           )}

diff --git a/web/app/components/base/audio-btn/index.tsx b/web/app/components/base/audio-btn/index.tsx
@@ -9,12 +9,14 @@ import { textToAudio } from '@/service/share'
 
 type AudioBtnProps = {
   value: string
+  voice?: string
   className?: string
   isAudition?: boolean
 }
 
 const AudioBtn = ({
   value,
+  voice,
   className,
   isAudition,
 }: AudioBtnProps) => {
@@ -27,13 +29,16 @@ const AudioBtn = ({
   const pathname = usePathname()
   const removeCodeBlocks = (inputText: any) => {
     const codeBlockRegex = /```[\s\S]*?```/g
-    return inputText.replace(codeBlockRegex, '')
+    if (inputText)
+      return inputText.replace(codeBlockRegex, '')
+    return ''
   }
 
   const playAudio = async () => {
     const formData = new FormData()
     if (value !== '') {
       formData.append('text', removeCodeBlocks(value))
+      formData.append('voice', removeCodeBlocks(voice))
 
       let url = ''
       let isPublic = false

diff --git a/web/app/components/base/chat/chat/answer/operation.tsx b/web/app/components/base/chat/chat/answer/operation.tsx
@@ -77,6 +77,7 @@ const Operation: FC<OperationProps> = ({
       {(!isOpeningStatement && config?.text_to_speech?.enabled) && (
         <AudioBtn
           value={content}
+          voice={config?.text_to_speech?.voice}
           className='hidden group-hover:block'
         />
       )}