diff --git a/videotrans/__init__.py b/videotrans/__init__.py index 48d017de..cd4890e8 100644 --- a/videotrans/__init__.py +++ b/videotrans/__init__.py @@ -1,4 +1,4 @@ # -*- coding: utf-8 -*- -VERSION="v1.89" -VERSION_NUM=110789 \ No newline at end of file +VERSION="v1.90" +VERSION_NUM=110790 \ No newline at end of file diff --git a/videotrans/configure/config.py b/videotrans/configure/config.py index d7ae849b..4f2e8b69 100644 --- a/videotrans/configure/config.py +++ b/videotrans/configure/config.py @@ -69,7 +69,7 @@ def parse_init(): "separate_sec":600, "audio_rate":1.5, "video_rate":20, - "initial_prompt_zh":"", + "initial_prompt_zh":"Please break sentences correctly and retain punctuation", "fontsize":16, "fontname":"黑体", "fontcolor":"&HFFFFFF", @@ -82,6 +82,8 @@ def parse_init(): "backaudio_volume":0.5, "overall_silence":2100, "overall_maxsecs":3, + "overall_threshold":0.5, + "overall_speech_pad_ms":100, "remove_srt_silence":False, "remove_silence":True, "remove_white_ms":100, diff --git a/videotrans/mainwin/secwin.py b/videotrans/mainwin/secwin.py index a0581dfd..19602421 100644 --- a/videotrans/mainwin/secwin.py +++ b/videotrans/mainwin/secwin.py @@ -555,7 +555,7 @@ def open_url(self, title): elif title == 'issue': webbrowser.open_new_tab("https://github.com/jianchang512/pyvideotrans/issues") elif title == 'discord': - webbrowser.open_new_tab("https://discord.gg/4WrxqBTSn8") + webbrowser.open_new_tab("https://discord.gg/y9gUweVCCJ") elif title == 'models': webbrowser.open_new_tab("https://github.com/jianchang512/stt/releases/tag/0.0") elif title == 'dll': @@ -604,9 +604,6 @@ def open_url(self, title): 本软件的所有解释权均属于开发者。谨请用户在理解、同意、遵守本免责声明的前提下使用本软件。 """) - elif title == 'aihelp': - webbrowser.open_new_tab("https://www.coze.cn/store/bot/7358853334134112296?panel=1") - # 工具箱 def open_toolbox(self, index=0, is_hide=True): try: diff --git a/videotrans/mainwin/spwin.py b/videotrans/mainwin/spwin.py index c1d30172..195b8ff6 100644 --- a/videotrans/mainwin/spwin.py +++ b/videotrans/mainwin/spwin.py @@ -306,7 +306,6 @@ def bind_action(self): self.action_cuda.triggered.connect(lambda: self.util.open_url('cuda')) self.action_online.triggered.connect(lambda: self.util.open_url('online')) self.action_website.triggered.connect(lambda: self.util.open_url('website')) - self.action_aihelp.triggered.connect(lambda: self.util.open_url('aihelp')) self.action_blog.triggered.connect(lambda: self.util.open_url('blog')) self.statusLabel.clicked.connect(lambda: self.util.open_url('help')) self.action_issue.triggered.connect(lambda: self.util.open_url('issue')) diff --git a/videotrans/recognition/all.py b/videotrans/recognition/all.py index 790b3426..09635f26 100644 --- a/videotrans/recognition/all.py +++ b/videotrans/recognition/all.py @@ -59,12 +59,13 @@ def recogn(*, vad_filter=bool(config.settings['vad']), vad_parameters=dict( min_silence_duration_ms=config.settings['overall_silence'], - max_speech_duration_s=config.settings['overall_maxsecs'] + max_speech_duration_s=config.settings['overall_maxsecs'], + threshold=config.settings['overall_threshold'], + speech_pad_ms=config.settings['overall_speech_pad_ms'] ), word_timestamps=True, language=detect_language, - initial_prompt=None if detect_language != 'zh' else config.settings[ - 'initial_prompt_zh']) + initial_prompt=config.settings['initial_prompt_zh']) # 保留原始语言的字幕 raw_subtitles = [] diff --git a/videotrans/recognition/avg.py b/videotrans/recognition/avg.py index 3f23141e..1d6f5f63 100644 --- a/videotrans/recognition/avg.py +++ b/videotrans/recognition/avg.py @@ -106,11 +106,19 @@ def recogn(*, text = "" try: segments, _ = model.transcribe(chunk_filename, - beam_size=5, - best_of=5, - condition_on_previous_text=True, + beam_size=config.settings['beam_size'], + best_of=config.settings['best_of'], + condition_on_previous_text=config.settings['condition_on_previous_text'], + temperature=0 if config.settings['temperature'] == 0 else [0.0, 0.2, 0.4,0.6, 0.8, 1.0], + vad_filter=bool(config.settings['vad']), + vad_parameters=dict( + min_silence_duration_ms=config.settings['overall_silence'], + max_speech_duration_s=config.settings['overall_maxsecs'], + threshold=config.settings['overall_threshold'], + speech_pad_ms=config.settings['overall_speech_pad_ms'] + ), language=detect_language, - initial_prompt=None if detect_language != 'zh' else config.settings['initial_prompt_zh'], ) + initial_prompt=config.settings['initial_prompt_zh'], ) for t in segments: text += t.text + " " except Exception as e: diff --git a/videotrans/recognition/openai.py b/videotrans/recognition/openai.py index 7d7a4a4b..1c29b9ac 100644 --- a/videotrans/recognition/openai.py +++ b/videotrans/recognition/openai.py @@ -91,7 +91,7 @@ def recogn(*, try: tr = model.transcribe(chunk_filename, language=detect_language, - initial_prompt=None if detect_language != 'zh' else config.settings['initial_prompt_zh'], + initial_prompt=config.settings['initial_prompt_zh'], condition_on_previous_text=config.settings['condition_on_previous_text'] ) for t in tr['segments']: diff --git a/videotrans/recognition/yuxian.py b/videotrans/recognition/yuxian.py index 9286221e..4edf5db8 100644 --- a/videotrans/recognition/yuxian.py +++ b/videotrans/recognition/yuxian.py @@ -13,7 +13,7 @@ # split audio by silence -def shorten_voice(normalized_sound, max_interval=60000): +def shorten_voice(normalized_sound, max_interval=300000): normalized_sound = tools.match_target_amplitude(normalized_sound, -20.0) nonsilent_data = [] audio_chunks = detect_nonsilent(normalized_sound, min_silence_len=int(config.settings['voice_silence']), @@ -36,13 +36,13 @@ def recogn(*, detect_language=None, audio_file=None, cache_folder=None, - model_name="base", + model_name="tiny", set_p=True, inst=None, is_cuda=None): if config.exit_soft or (config.current_status != 'ing' and config.box_recogn != 'ing'): return False - if set_p: + if set_p: tools.set_process(config.transobj['fengeyinpinshuju'], btnkey=inst.init['btnkey'] if inst else "") noextname = os.path.basename(audio_file) tmp_path = f'{cache_folder}/{noextname}_tmp' @@ -110,19 +110,21 @@ def recogn(*, vad_filter=bool(config.settings['vad']), vad_parameters=dict( min_silence_duration_ms=config.settings['overall_silence'], - max_speech_duration_s=config.settings['overall_maxsecs'] + max_speech_duration_s=config.settings['overall_maxsecs'], + threshold=config.settings['overall_threshold'], + speech_pad_ms=config.settings['overall_speech_pad_ms'] ), word_timestamps=True, language=detect_language, - initial_prompt=None if detect_language != 'zh' else config.settings['initial_prompt_zh'], ) + initial_prompt=config.settings['initial_prompt_zh']) for t in segments: - if detect_language == 'zh' and t.text == config.settings['initial_prompt_zh']: + if t.text == config.settings['initial_prompt_zh']: continue start_time, end_time, buffered = duration text = t.text text = f"{text.capitalize()}. ".replace(''', "'") text = re.sub(r'&#\d+;', '', text).strip().strip('.') - if detect_language == 'zh' and text == config.settings['initial_prompt_zh']: + if text == config.settings['initial_prompt_zh']: continue if not text or re.match(r'^[,。、?‘’“”;:({}【】):;"\'\s \d`!@#$%^&*()_+=.,?/\\-]*$', text): continue diff --git a/videotrans/set.ini b/videotrans/set.ini index 792f6387..44b9d6ab 100644 --- a/videotrans/set.ini +++ b/videotrans/set.ini @@ -56,17 +56,24 @@ force_edit_srt=true ; ###############语句分割相关################################## ; statement segmentation related ################################## -;用于 预先分割 和 整体识别 时,作为切割依据的最小静音片段ms,默认200ms 以及最大句子时长3s +;faster-whisper字幕整体识别模式时启用自定义静音分割片段,true=启用,显存不足时,可以设为false禁用 +;Enable custom mute segmentation when subtitles are in overall recognition mode, true=enable, can be set to false to disable when video memory is insufficient. +vad=true + +;用于 faster-whisper 时 VAD选项设置作为切割依据的最小静音片段ms,默认250ms 以及最大句子时长6s ;The minimum silent segmentation ms, default 200ms, and the maximum sentence length 3s are used for pre-segmentation and overall recognition as the basis for segmentation. -overall_silence=200 -overall_maxsecs=3 +overall_silence=250 +overall_maxsecs=6 +overall_threshold=0.5 +overall_speech_pad_ms=100 + -;用于均等分割时,作为切割依据的最小静音片段ms,默认200ms,即只有大于等于200ms的静音处才分割 +;用于均等分割时和openai模式时,作为切割依据的最小静音片段ms,默认200ms,即只有大于等于200ms的静音处才分割 ; used for equal segmentation, as the basis for cutting the minimum silence segment ms, the default 200ms, that is, only greater than or equal to 200ms silence at the split -voice_silence=200 -;用于均等分割时的每个切片时长 秒,默认 6s,即每个字幕时长大约都是6s +voice_silence=250 +;用于均等分割时的每个切片时长 秒,默认 10s,即每个字幕时长大约都是10s ;seconds per slice for equalization, default 6s, i.e. each subtitle is about 6s. -interval_split=6 +interval_split=10 ;################翻译配音速度############################# @@ -114,9 +121,9 @@ loop_backaudio=false ; cuda data type when recognizing subtitles from video, int8=consumes fewer resources, faster, lower precision, float32=consumes more resources, slower, higher precision, int8_float16=device of choice cuda_com_type=float32 -;中文语言的视频时,用于识别的提示词,可解决简体识别为繁体问题。但注意,有可能直接会将提示词作为识别结果返回 -;The prompt words used to recognize videos in Chinese language can solve the problem of recognizing simplified Chinese as traditional Chinese. But note that there is a possibility that the prompt word will be returned directly as the result of the recognition. -initial_prompt_zh= +;发送给whisper模型的提示词 +;Cue words sent to the whisper model. +initial_prompt_zh=Please break sentences correctly and retain punctuation. ;字幕识别时,cpu进程 ;cpu process during subtitle recognition @@ -131,13 +138,11 @@ whisper_worker=1 beam_size=5 best_of=5 -;faster-whisper字幕整体识别模式时启用自定义静音分割片段,true=启用,显存不足时,可以设为false禁用 -;Enable custom mute segmentation when subtitles are in overall recognition mode, true=enable, can be set to false to disable when video memory is insufficient. -vad=true + ;0=占用更少GPU资源但效果略差,1=占用更多GPU资源同时效果更好 ;0 = less GPU resources but slightly worse results, 1 = more GPU resources and better results at the same time -temperature=1 +temperature=0 ;同 temperature, true=占用更多GPU效果更好,false=占用更少GPU效果略差 ; same as temperature, true=better with more GPUs, false=slightly worse with fewer GPUs diff --git a/videotrans/ui/en.py b/videotrans/ui/en.py index 362ca498..3ef229cc 100644 --- a/videotrans/ui/en.py +++ b/videotrans/ui/en.py @@ -103,42 +103,8 @@ def setupUi(self, MainWindow): self.translate_type.setObjectName("translate_type") self.layout_translate_type.setWidget(0, QtWidgets.QFormLayout.FieldRole, self.translate_type) self.horizontalLayout_5.addLayout(self.layout_translate_type) - - - - self.horizontalLayout_5.addStretch() - self.layout_proxy = QtWidgets.QFormLayout() - self.layout_proxy.setObjectName("layout_proxy") - self.label = QtWidgets.QLabel(self.layoutWidget) - sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Minimum) - sizePolicy.setHorizontalStretch(0) - sizePolicy.setVerticalStretch(0) - sizePolicy.setHeightForWidth(self.label.sizePolicy().hasHeightForWidth()) - self.label.setSizePolicy(sizePolicy) - self.label.setMinimumSize(QtCore.QSize(0, 30)) - self.label.setObjectName("label") - self.layout_proxy.setWidget(0, QtWidgets.QFormLayout.LabelRole, self.label) - self.proxy = QtWidgets.QLineEdit(self.layoutWidget) - sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum) - sizePolicy.setHorizontalStretch(0) - sizePolicy.setVerticalStretch(0) - sizePolicy.setHeightForWidth(self.proxy.sizePolicy().hasHeightForWidth()) - self.proxy.setSizePolicy(sizePolicy) - self.proxy.setMaximumSize(QtCore.QSize(220, 30)) - self.proxy.setObjectName("proxy") - self.layout_proxy.setWidget(0, QtWidgets.QFormLayout.FieldRole, self.proxy) - self.horizontalLayout_5.addLayout(self.layout_proxy) - - self.listen_btn = QtWidgets.QPushButton(self.layoutWidget) - self.listen_btn.setEnabled(False) - self.listen_btn.setFixedWidth(80) - - - self.verticalLayout_2.addLayout(self.horizontalLayout_5) - self.horizontalLayout = QtWidgets.QHBoxLayout() - self.horizontalLayout.setSizeConstraint(QtWidgets.QLayout.SetMinimumSize) - self.horizontalLayout.setObjectName("horizontalLayout") + # 原始语言 目标语言 start self.layout_source_language = QtWidgets.QFormLayout() self.layout_source_language.setFormAlignment(QtCore.Qt.AlignLeading|QtCore.Qt.AlignLeft|QtCore.Qt.AlignVCenter) self.layout_source_language.setObjectName("layout_source_language") @@ -160,7 +126,11 @@ def setupUi(self, MainWindow): self.source_language.setMinimumSize(QtCore.QSize(0, 30)) self.source_language.setObjectName("source_language") self.layout_source_language.setWidget(0, QtWidgets.QFormLayout.FieldRole, self.source_language) - self.horizontalLayout.addLayout(self.layout_source_language) + + + + + self.layout_target_language = QtWidgets.QFormLayout() self.layout_target_language.setFormAlignment(QtCore.Qt.AlignLeading|QtCore.Qt.AlignLeft|QtCore.Qt.AlignVCenter) self.layout_target_language.setObjectName("layout_target_language") @@ -180,9 +150,59 @@ def setupUi(self, MainWindow): sizePolicy.setHeightForWidth(self.target_language.sizePolicy().hasHeightForWidth()) self.target_language.setSizePolicy(sizePolicy) self.target_language.setMinimumSize(QtCore.QSize(0, 30)) - self.target_language.setObjectName("target_language") + self.target_language.setObjectName("target_language") + self.layout_target_language.setWidget(0, QtWidgets.QFormLayout.FieldRole, self.target_language) - self.horizontalLayout.addLayout(self.layout_target_language) + + + self.horizontalLayout_5.addLayout(self.layout_source_language) + self.horizontalLayout_5.addLayout(self.layout_target_language) + # 原始语言 目标语言 end + + + + #self.horizontalLayout_5.addStretch() + self.layout_proxy = QtWidgets.QFormLayout() + self.layout_proxy.setObjectName("layout_proxy") + + self.label = QtWidgets.QLabel(self.layoutWidget) + sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Minimum) + sizePolicy.setHorizontalStretch(0) + sizePolicy.setVerticalStretch(0) + sizePolicy.setHeightForWidth(self.label.sizePolicy().hasHeightForWidth()) + self.label.setSizePolicy(sizePolicy) + self.label.setMinimumSize(QtCore.QSize(0, 30)) + self.label.setObjectName("label") + self.layout_proxy.setWidget(0, QtWidgets.QFormLayout.LabelRole, self.label) + self.proxy = QtWidgets.QLineEdit(self.layoutWidget) + sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum) + sizePolicy.setHorizontalStretch(0) + sizePolicy.setVerticalStretch(0) + sizePolicy.setHeightForWidth(self.proxy.sizePolicy().hasHeightForWidth()) + self.proxy.setSizePolicy(sizePolicy) + self.proxy.setMaximumSize(QtCore.QSize(220, 30)) + self.proxy.setObjectName("proxy") + self.layout_proxy.setWidget(0, QtWidgets.QFormLayout.FieldRole, self.proxy) + self.horizontalLayout_5.addLayout(self.layout_proxy) + + self.listen_btn = QtWidgets.QPushButton(self.layoutWidget) + self.listen_btn.setEnabled(False) + self.listen_btn.setFixedWidth(80) + + + self.verticalLayout_2.addLayout(self.horizontalLayout_5) + self.horizontalLayout = QtWidgets.QHBoxLayout() + self.horizontalLayout.setSizeConstraint(QtWidgets.QLayout.SetMinimumSize) + self.horizontalLayout.setObjectName("horizontalLayout") + + + + + + #self.horizontalLayout.addLayout(self.layout_source_language) + #self.horizontalLayout.addLayout(self.layout_target_language) + + self.layout_tts_type = QtWidgets.QFormLayout() self.layout_tts_type.setFormAlignment(QtCore.Qt.AlignLeading|QtCore.Qt.AlignLeft|QtCore.Qt.AlignVCenter) self.layout_tts_type.setObjectName("layout_tts_type") @@ -290,7 +310,7 @@ def setupUi(self, MainWindow): sizePolicy.setVerticalStretch(0) sizePolicy.setHeightForWidth(self.whisper_model.sizePolicy().hasHeightForWidth()) self.whisper_model.setSizePolicy(sizePolicy) - self.whisper_model.setMinimumSize(QtCore.QSize(0, 30)) + self.whisper_model.setMinimumSize(QtCore.QSize(180, 30)) self.whisper_model.setObjectName("whisper_model") self.layout_whisper_model.addWidget(self.whisper_model, 0, 1, 1, 1) self.whisper_type = QtWidgets.QComboBox(self.layoutWidget) @@ -299,7 +319,7 @@ def setupUi(self, MainWindow): sizePolicy.setVerticalStretch(0) sizePolicy.setHeightForWidth(self.whisper_type.sizePolicy().hasHeightForWidth()) self.whisper_type.setSizePolicy(sizePolicy) - self.whisper_type.setMinimumSize(QtCore.QSize(0, 30)) + self.whisper_type.setMinimumSize(QtCore.QSize(100, 30)) self.whisper_type.setObjectName("whisper_type") self.layout_whisper_model.addWidget(self.whisper_type, 0, 2, 1, 1) self.horizontalLayout_4.addLayout(self.layout_whisper_model) @@ -571,8 +591,6 @@ def setupUi(self, MainWindow): self.action_website = QtGui.QAction(MainWindow) self.action_website.setObjectName("action_website") - self.action_aihelp = QtGui.QAction(MainWindow) - self.action_aihelp.setObjectName("self.action_aihelp") self.action_blog = QtGui.QAction(MainWindow) self.action_blog.setObjectName("action_blog") self.action_discord = QtGui.QAction(MainWindow) @@ -706,8 +724,6 @@ def setupUi(self, MainWindow): self.menu_H.addSeparator() self.menu_H.addAction(self.action_website) self.menu_H.addSeparator() - self.menu_H.addAction(self.action_aihelp) - self.menu_H.addSeparator() self.menu_H.addAction(self.action_blog) self.menu_H.addSeparator() @@ -768,7 +784,7 @@ def retranslateUi(self): self.source_language.setToolTip(config.uilanglist.get("The language used for the original video pronunciation")) self.label_3.setText(config.uilanglist.get("Target lang")) self.target_language.setToolTip(config.uilanglist.get("What language do you want to translate into")) - self.tts_text.setText("TTS") + self.tts_text.setText("配音渠道" if config.defaulelang=='zh' else "TTS") self.label_4.setText(config.uilanglist.get("Dubbing role")) self.voice_role.setToolTip(config.uilanglist.get("No is not dubbing")) @@ -821,7 +837,6 @@ def retranslateUi(self): self.actionzhrecogn_api.setText("zh_recogn中文语音识别" if config.defaulelang=='zh' else "zh_recogn only Chinese") self.actiontts_gptsovits.setText("GPT-SoVITS TTS") self.action_website.setText(config.uilanglist.get("Documents")) - self.action_aihelp.setText('AI文档问答助手') self.action_discord.setText("Discord") self.action_blog.setText("博客教程" if config.defaulelang=='zh' else 'My Blog') self.action_models.setText(config.uilanglist["Download Models"]) diff --git a/videotrans/util/tools.py b/videotrans/util/tools.py index 8c5d838d..a63a7298 100644 --- a/videotrans/util/tools.py +++ b/videotrans/util/tools.py @@ -454,7 +454,7 @@ def conver_to_8k(audio, target_audio): "-ac", "1", "-ar", - "8000", + "16000", Path(target_audio).as_posix(), ]) @@ -510,7 +510,7 @@ def m4a2wav(m4afile, wavfile): "-ac", "1", "-ar", - "8000", + "16000", "-b:a", "128k", "-c:a", @@ -882,7 +882,7 @@ def cut_from_audio(*, ss, to, audio_file, out_file): "-to", format_time(to, '.'), "-ar", - "8000", + "16000", out_file ] return runffmpeg(cmd)