Upgrade: pytorch to 2.5

jianchang512 · Nov 17, 2024 · 30b17f3 · 30b17f3
1 parent 6454128
commit 30b17f3
Show file tree

Hide file tree

Showing 12 changed files with 378 additions and 250 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -27,8 +27,8 @@ soundfile
 SpeechRecognition
 tencentcloud-sdk-python-common
 tencentcloud-sdk-python-tmt
-torch==2.2.0
-torchaudio==2.2.0
+torch
+torchaudio
 zhconv
 azure-cognitiveservices-speech
 pydantic==2.4.2

diff --git a/sp.py b/sp.py
@@ -1,5 +1,5 @@
-# -*- coding: utf-8 -*-
 # 代码是一坨屎，但又不是不能跑O(∩_∩)O~
+# 代码越写越是坨屎，好烦
 import multiprocessing
 import sys, os
 import time

diff --git a/videotrans/__init__.py b/videotrans/__init__.py
@@ -1,4 +1,4 @@
 # -*- coding: utf-8 -*-
 
-VERSION = "v3.14"
-VERSION_NUM = 120314
+VERSION = "v3.15"
+VERSION_NUM = 120315
diff --git a/videotrans/configure/config.py b/videotrans/configure/config.py
@@ -262,6 +262,7 @@ def parse_init():
 
         "crf": 23,
         "cuda_qp": False,
+        "cuda_decode":False,
         "preset": "fast",
         "ffmpeg_cmd": "",
         "aisendsrt":False,

diff --git a/videotrans/mainwin/_actions.py b/videotrans/mainwin/_actions.py
diff --git a/videotrans/mainwin/_main_win.py b/videotrans/mainwin/_main_win.py
@@ -177,7 +177,7 @@ def _set_cache_set(self):
         self.proxy.setText(config.proxy)
         self.continue_compos.setToolTip(config.transobj['Click to start the next step immediately'])
         self.split_type.addItems([config.transobj['whisper_type_all'], config.transobj['whisper_type_avg']])
-        self.export_sub.setText(config.transobj['Export srt'])
+        # self.export_sub.setText(config.transobj['Export srt'])
         self.subtitle_type.addItems(
             [
                 config.transobj['nosubtitle'],
@@ -237,10 +237,10 @@ def _set_cache_set(self):
         self.target_language.currentTextChanged.connect(self.win_action.set_voice_role)
         self.source_language.currentTextChanged.connect(self.win_action.source_language_change)
 
-        self.set_line_role.clicked.connect(self.win_action.set_line_role_fun)
+        # self.set_line_role.clicked.connect(self.win_action.set_line_role_fun)
         self.proxy.textChanged.connect(self.win_action.change_proxy)
         self.import_sub.clicked.connect(self.win_action.import_sub_fun)
-        self.export_sub.clicked.connect(self.win_action.export_sub_fun)
+        # self.export_sub.clicked.connect(self.win_action.export_sub_fun)
         self.startbtn.clicked.connect(self.win_action.check_start)
         self.btn_save_dir.clicked.connect(self.win_action.get_save_dir)
         self.btn_get_video.clicked.connect(self.win_action.get_mp4)
@@ -262,8 +262,8 @@ def _set_cache_set(self):
 
     def start_subform(self):
         self.import_sub.setCursor(Qt.PointingHandCursor)
-        self.export_sub.setCursor(Qt.PointingHandCursor)
-        self.set_line_role.setCursor(Qt.PointingHandCursor)
+        # self.export_sub.setCursor(Qt.PointingHandCursor)
+        # self.set_line_role.setCursor(Qt.PointingHandCursor)
         self.model_name_help.setCursor(Qt.PointingHandCursor)
         self.stop_djs.setCursor(Qt.PointingHandCursor)
         self.continue_compos.setCursor(Qt.PointingHandCursor)

diff --git a/videotrans/recognition/_average.py b/videotrans/recognition/_average.py
@@ -84,8 +84,6 @@ def _exec(self) -> Union[List[Dict], None]:
                     self.error = "没有识别到任何说话声" if config.defaulelang=='zh' else "No speech detected"
                 else:
                     self.raws = list(raws)
-                    #if self.detect_language=='auto' and self.inst and hasattr(self.inst,'set_source_language'):
-                    #    self.inst.set_source_language(detect['langcode'])
                 try:
                     if process.is_alive():
                         process.terminate()
@@ -94,7 +92,7 @@ def _exec(self) -> Union[List[Dict], None]:
         except (LookupError,ValueError,AttributeError,ArithmeticError) as e:
             raise
         except Exception as e:
-            raise Exception(f"faster-whisper进程崩溃，请尝试使用openai-whisper模式或查看解决方案 https://pyvideotrans.com/12.html   :{e}")
+            raise Exception(f"{e}")
         finally:
             config.model_process = None
             self.has_done = True

diff --git a/videotrans/recognition/_overall.py b/videotrans/recognition/_overall.py
@@ -123,7 +123,7 @@ def _exec(self):
         except (LookupError,ValueError,AttributeError,ArithmeticError) as e:
             self.error=str(e)
         except Exception as e:
-            self.error=f"faster-whisper进程崩溃，请尝试使用openai-whisper模式或查看解决方案 https://pyvideotrans.com/12.html   :{e}"
+            self.error=f"{e}"
         finally:
             config.model_process = None
             self.has_done = True

diff --git a/videotrans/translator/_base.py b/videotrans/translator/_base.py
@@ -385,21 +385,21 @@ def _set_cache(self, it, res_str):
             return
         key_cache = self._get_key(it)
 
-        file_cache = config.SYS_TMP + f'/translate_cache/{key_cache}.txt'
-        if not Path(config.SYS_TMP + f'/translate_cache').is_dir():
-            Path(config.SYS_TMP + f'/translate_cache').mkdir(parents=True, exist_ok=True)
+        file_cache = config.TEMP_DIR + f'/translate_cache/{key_cache}.txt'
+        if not Path(config.TEMP_DIR + f'/translate_cache').is_dir():
+            Path(config.TEMP_DIR + f'/translate_cache').mkdir(parents=True, exist_ok=True)
         Path(file_cache).write_text(res_str, encoding='utf-8')
 
     def _get_cache(self, it):
         if self.is_test:
             return None
         key_cache = self._get_key(it)
-        file_cache = config.SYS_TMP + f'/translate_cache/{key_cache}.txt'
+        file_cache = config.TEMP_DIR + f'/translate_cache/{key_cache}.txt'
         if Path(file_cache).exists():
             return Path(file_cache).read_text(encoding='utf-8')
         return None
 
     def _get_key(self, it):
-        Path(config.SYS_TMP + '/translate_cache').mkdir(parents=True, exist_ok=True)
+        Path(config.TEMP_DIR + '/translate_cache').mkdir(parents=True, exist_ok=True)
         return tools.get_md5(
             f'{self.__class__.__name__}-{self.model_name}-{self.source_code}-{self.target_code}-{it if isinstance(it, str) else json.dumps(it)}')
diff --git a/videotrans/ui/en.py b/videotrans/ui/en.py
@@ -25,8 +25,8 @@ def setupUi(self, MainWindow):
         self.splitter.setOrientation(QtCore.Qt.Horizontal)
         self.splitter.setObjectName("splitter")
         self.splitter.setSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Preferred)
-        self.splitter.setMaximumWidth(700)
-        self.splitter.setMinimumWidth(380)
+        # self.splitter.setMaximumWidth(700)
+        self.splitter.setMinimumWidth(500)
 
         self.layoutWidget = QtWidgets.QWidget(self.splitter)
         self.layoutWidget.setObjectName("layoutWidget")
@@ -103,7 +103,7 @@ def setupUi(self, MainWindow):
         self.horizontalLayout_5.addWidget(self.label_9)
         self.horizontalLayout_5.addWidget(self.translate_type)
 
-        # 原始语言 目标语言 start       
+        # 原始语言 目标语言 start
         self.label_2 = QtWidgets.QPushButton(self.layoutWidget)
         self.label_2.setMinimumSize(QtCore.QSize(0, 30))
         self.label_2.setStyleSheet("""background-color:transparent""")
@@ -565,44 +565,49 @@ def setupUi(self, MainWindow):
         self.verticalLayoutWidget = QtWidgets.QWidget(self.splitter)
         self.verticalLayoutWidget.setObjectName("verticalLayoutWidget")
 
-        self.subtitle_layout = QtWidgets.QVBoxLayout(self.verticalLayoutWidget)
+        self.subtitle_layout = QtWidgets.QHBoxLayout(self.verticalLayoutWidget)
         self.subtitle_layout.setContentsMargins(3, 0, 0, 0)
         self.subtitle_layout.setObjectName("subtitle_layout")
 
+        source_area_layout=QtWidgets.QVBoxLayout()
         self.subtitle_area = TextGetdir(self)
         self.subtitle_area.setSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Preferred)
         self.subtitle_area.setPlaceholderText(
             f"{config.transobj['zimubianjitishi']}\n\n{config.transobj['subtitle_tips']}\n\n{config.transobj['meitiaozimugeshi']}")
+        self.import_sub = QtWidgets.QPushButton(self.verticalLayoutWidget)
+        self.import_sub.setObjectName("import_sub")
+        source_area_layout.addWidget(self.subtitle_area)
+        source_area_layout.addWidget(self.import_sub)
 
-        self.subtitle_hbox_layout=QtWidgets.QHBoxLayout()
-        self.subtitle_hbox_layout.addWidget(self.subtitle_area,1)
+        # self.subtitle_hbox_layout=QtWidgets.QHBoxLayout()
+        # self.subtitle_hbox_layout.addWidget(self.subtitle_area,1)
 
-        self.target_subtitle_area=QtWidgets.QPlainTextEdit()
-        self.target_subtitle_area.setPlaceholderText('翻译后的字幕' if config.defaulelang=='zh' else 'Translated Subtitle')
-        self.target_subtitle_area.setVisible(False)
-        self.subtitle_hbox_layout.addWidget(self.target_subtitle_area,1)
+        self.target_subtitle_area= QtWidgets.QVBoxLayout() #QtWidgets.QPlainTextEdit()
+        # self.target_subtitle_area.setPlaceholderText('翻译后的字幕' if config.defaulelang=='zh' else 'Translated Subtitle')
+        # self.target_subtitle_area.setVisible(False)
+        # self.subtitle_hbox_layout.addWidget(self.target_subtitle_area,1)
 
 
-        self.subtitle_layout.addLayout(self.subtitle_hbox_layout)
+        self.subtitle_layout.addLayout(source_area_layout)
+        self.subtitle_layout.addLayout(self.target_subtitle_area)
 
-        self.layout_sub_bottom = QtWidgets.QHBoxLayout()
-        self.layout_sub_bottom.setObjectName("layout_sub_bottom")
+        # self.layout_sub_bottom = QtWidgets.QHBoxLayout()
+        # self.layout_sub_bottom.setObjectName("layout_sub_bottom")
 
         #
-        self.import_sub = QtWidgets.QPushButton(self.verticalLayoutWidget)
-        self.import_sub.setObjectName("import_sub")
-        self.layout_sub_bottom.addWidget(self.import_sub)
+
+        # self.layout_sub_bottom.addWidget(self.import_sub)
         #
-        self.export_sub = QtWidgets.QPushButton(self.verticalLayoutWidget)
-        self.export_sub.setObjectName("export_sub")
+        # self.export_sub = QtWidgets.QPushButton(self.verticalLayoutWidget)
+        # self.export_sub.setObjectName("export_sub")
         #
-        self.layout_sub_bottom.addWidget(self.export_sub)
+        # self.layout_sub_bottom.addWidget(self.export_sub)
         #
-        self.set_line_role = QtWidgets.QPushButton(self.verticalLayoutWidget)
-        self.set_line_role.setObjectName("set_line_role")
+        # self.set_line_role = QtWidgets.QPushButton(self.verticalLayoutWidget)
+        # self.set_line_role.setObjectName("set_line_role")
         #
-        self.layout_sub_bottom.addWidget(self.set_line_role)
-        self.subtitle_layout.addLayout(self.layout_sub_bottom)
+        # self.layout_sub_bottom.addWidget(self.set_line_role)
+        # self.subtitle_layout.addLayout(self.layout_sub_bottom)
 
         self.horizontalLayout_7.addWidget(self.splitter)
         MainWindow.setCentralWidget(self.centralwidget)
@@ -998,7 +1003,7 @@ def retranslateUi(self):
         self.stop_djs.setText(config.uilanglist.get("Pause"))
         self.import_sub.setText(config.uilanglist.get("Import srt"))
 
-        self.set_line_role.setText(config.uilanglist.get("Set role by line"))
+        # self.set_line_role.setText(config.uilanglist.get("Set role by line"))
         self.menu_Key.setTitle(config.uilanglist.get("&Setting"))
         self.menu_TTS.setTitle(config.uilanglist.get("&TTSsetting"))
         self.menu_RECOGN.setTitle(config.uilanglist.get("&RECOGNsetting"))

diff --git a/videotrans/ui/setini.py b/videotrans/ui/setini.py
@@ -103,6 +103,7 @@ def setupUi(self, setini):
                 "cuda_qp": "是否在 NVIDIA cuda上使用 qp代替crf",
                 "preset": "主要调节编码速度和质量的平衡，有ultrafast、superfast、veryfast、faster、fast、medium、slow、slower、veryslow 选项，编码速度从快到慢、压缩率从低到高、视频尺寸从大到小。 ",
                 "ffmpeg_cmd": "自定义ffmpeg命令参数， 将添加在倒数第二个位置上,例如  -bf 7 -b_ref_mode middle",
+                "cuda_decode":"使用cuda解码视频",
                 "video_codec": "采用 libx264 编码或 libx265编码，264兼容性更好，265压缩比更大清晰度更高"
             },
 
@@ -195,6 +196,7 @@ def setupUi(self, setini):
             "is_queue": "视频翻译排队处理(默认交叉)",
             "lang": "界面语言",
             "crf": "视频转码损失控制",
+            "cuda_decode":"使用cuda解码视频",
             "cuda_qp": "NVIDIA使用qp代替crf",
             "preset": "输出视频质量压缩率控制",
             "ffmpeg_cmd": "自定义ffmpeg命令参数",
@@ -304,6 +306,7 @@ def setupUi(self, setini):
                 "video": {
                     "crf": "Loss control during video transcoding, 0 = minimum loss, 51 = maximum loss, default is 13",
                     "cuda_qp": "Whether to use qp instead of crf on NVIDIA cuda",
+                    "cuda_decode":"Decode the video using cuda",
                     "preset": "Mainly adjust the balance of encoding speed and quality, there are ultrafast, superfast, veryfast, fast, fast, medium, slow, slow, veryslow options, encoding speed from fast to slow, compression rate from low to high, video size from large to small.",
                     "ffmpeg_cmd": "Custom ffmpeg command parameters, added at the penultimate position, e.g., -bf 7 -b_ref_mode middle",
                     "video_codec": "Use libx264 or libx265 encoding, 264 has better compatibility, 265 has higher compression ratio and clarity"
@@ -410,6 +413,7 @@ def setupUi(self, setini):
                 "aisendsrt":"Sending full subtitle content when ai translation",
                 "crf": "Video Transcoding Loss Control",
                 "cuda_qp": "NVIDIA Use QP Instead of CRF",
+                "cuda_decode":"Decode the video using cuda",
                 "preset": "Output Video Quality compression rate",
                 "ffmpeg_cmd": "Custom FFmpeg Command Parameters",
                 "video_codec": "H.264 or H.265 Video Encoding",

diff --git a/videotrans/util/tools.py b/videotrans/util/tools.py
@@ -451,22 +451,35 @@ def runffmpeg(arg, *, noextname=None, uuid=None,force_cpu=False):
     file_name=""
 
     cmd = [config.FFMPEG_BIN, "-hide_banner", "-ignore_unknown"]
-    # 启用了CUDA 并且没有禁用GPU
     # 默认视频编码 libx264 / libx265
     default_codec = f"libx{config.settings['video_codec']}"
 
+    # 尝试cuda加速解码编码
     if not force_cpu and default_codec in arg and config.video_codec != default_codec:
         if not config.video_codec:
             config.video_codec = get_video_codec()
-
+        # 判断第一个输入是不是mp4，是则尝试cuda解码
+        has_mp4=False
+        # 插入解码位置
+        insert_index=-1
         for i, it in enumerate(arg):
+            if insert_index==-1 and arg[i]=='-i':
+                insert_index=i
+                has_mp4=True if arg[i+1][-3:] in ['mp4','txt'] else False
+
             if i > 0 and arg[i - 1] == '-c:v' and arg[i] !='copy':
                 arg[i] = config.video_codec
             elif it == '-crf' and config.settings['cuda_qp'] and re.search(r'\sh(264|evc)_nvenc\s'," ".join(cmd),re.I):
                 arg[i] = '-qp'
                 if arg[i]=='copy':
                     arg[i+1]='0'
-
+        # 第一个 -i 输入是mp4或txt连接文件，并且最终输出是mp4，并且已支持cuda编码，则尝试使用cuda解码
+        # 因显卡兼容性，出错率较高
+        if config.settings.get('cuda_decode',False) and insert_index>-1 and has_mp4 and arg[-1][-3:]=='mp4' and config.video_codec in ['h264_nvenc','hevc_nvenc']:
+            arg.insert(i,'h264_cuvid' if config.video_codec=='h264_nvenc' else 'hevc_cuvid')
+            arg.insert(i,'-c:v')
+            arg.insert(i,'cuda')
+            arg.insert(i,'-hwaccel')
 
     cmd += arg
     if Path(cmd[-1]).is_file():