From aa86597d5e65c9c40b12841a945be7a3cd0fb7b3 Mon Sep 17 00:00:00 2001 From: Nil Admirari <50202386+nihil-admirari@users.noreply.github.com> Date: Sat, 22 Jun 2024 09:36:24 +0300 Subject: [PATCH] Encoder improvements (#127) * Encoder improvements * preserve aspect ratio using -2 * add comment * restore lost comment * update comment * format code use h.unpack() to avoid crashing mpvacious * add comment * add function quality_to_crf_avif() * extract function * extract function * remove duplicate numbers * add function quality_to_crf_jpeg() * Disable loudnorm for compatibility; user filters fix; refactor quality and scale functions * Mention opus_container in README --------- Co-authored-by: Ren Tatsumoto --- .github/RELEASE/subs2srs.conf | 46 ++- README.md | 11 +- cfg_mgr.lua | 43 ++- encoder.lua | 596 +++++++++++++++++++++++++--------- subs2srs.lua | 60 ++-- 5 files changed, 559 insertions(+), 197 deletions(-) diff --git a/.github/RELEASE/subs2srs.conf b/.github/RELEASE/subs2srs.conf index 77739fd..ee98436 100644 --- a/.github/RELEASE/subs2srs.conf +++ b/.github/RELEASE/subs2srs.conf @@ -106,7 +106,7 @@ append_media=yes # Remove text in brackets before substituting %n into tag tag_nuke_brackets=yes -# Remove text in brackets before substituting %n into tag +# Remove text in parentheses before substituting %n into tag tag_nuke_parentheses=no # Remove the episode number before substituting %n into tag @@ -182,6 +182,10 @@ audio_template=[sound:%s] # If enabled, generates animated snapshots (something like GIFs) instead of static snapshots. animated_snapshot_enabled=no +# Animated snapshot format. Like "snapshot_format" but for animated images. Can be either avif or webp. +animated_snapshot_format=avif +#animated_snapshot_format=webp + # Number of frame per seconds, a value between 0 and 30 (30 included) # Higher values will increase both quality and file size, lower values will do the opposite animated_snapshot_fps=10 @@ -201,11 +205,23 @@ animated_snapshot_quality=5 # Audio format. # Opus is the recommended format. -# It may be required to use a different format when Opus is not supported. -# This is the case on certain computers or devices which are running proprietary operating systems. audio_format=opus #audio_format=mp3 -#audio_format=caf + +# Container for opus files. +# It may be required to use a different container for Opus. +# This is the case on certain computers or devices +# which are running proprietary operating systems, e.g. AnkiMobile. Using them is discouraged. +# ・ Ogg/Opus play everywhere except AnkiWeb in Safari and AnkiMobile. +# ・ M4A (iOS 17.2 and probably even earlier) and WEBM (since iOS 17.4) play everywhere. +# ・ Opus in CAF can be used with older iOS. CAF plays only on Anki Desktop, Safari and AnkiMobile. +# ・ (iOS Lockdown Mode disables Opus support completely, +# though you may try to add an exception for AnkiMobile.) +opus_container=ogg +#opus_container=opus +#opus_container=m4a +#opus_container=webm +#opus_container=caf # Sane values are 16k-32k for opus, 64k-128k for mp3. audio_bitrate=24k @@ -275,6 +291,16 @@ secondary_sub_area=0.15 # Default binding to cycle this value: Ctrl+v. secondary_sub_visibility=auto +# Perform two-pass loudness normalization. +# Parameter explanation can be found e.g. at: +# https://auphonic.com/blog/2013/01/07/loudness-targets-mobile-audio-podcasts-radio-tv/ +# https://auphonic.com/blog/2019/08/19/dynamic-range-processing/ +# MAKE SURE TO REMOVE loudnorm FROM CUSTOM ARGS BEFORE ENABLING. +loudnorm=no +loudnorm_target=-16 +loudnorm_range=11 +loudnorm_peak=-1.5 + ## ## Custom audio encoding arguments ## These arguments are added to the command line. @@ -282,11 +308,19 @@ secondary_sub_visibility=auto ## Feel free to experiment for yourself, but be careful or media creation might stop working. ## +# loudnorm IN CUSTOM ARGS IS LEFT FOR BACKWARD COMPATIBILITY. +# MAKE SURE TO REMOVE ALL MENTIONS OF loudnorm FROM CUSTOM ARGS +# (E.G. SET TO EMPTY STRINGS) BEFORE ENABLING TWO-PASS loudnorm. +# ENABLING loudnorm BOTH THROUGH THE SWITCH AND THROUGH CUSTOM ARGS +# CAN LEAD TO UNPREDICTABLE RESULTS. + # Ffmpeg -ffmpeg_audio_args=-af loudnorm=I=-16:TP=-1.5:LRA=11 +ffmpeg_audio_args=-af loudnorm=I=-16:TP=-1.5:LRA=11:dual_mono=true +#ffmpeg_audio_args= #ffmpeg_audio_args=-af silenceremove=1:0:-50dB # mpv # mpv accepts each filter as a separate argument, e.g. --af-append=1 --af-append=2 -mpv_audio_args=--af-append=loudnorm=I=-16:TP=-1.5:LRA=11 +mpv_audio_args=--af-append=loudnorm=I=-16:TP=-1.5:LRA=11:dual_mono=true +#mpv_audio_args= #mpv_audio_args=--af-append=silenceremove=1:0:-50dB diff --git a/README.md b/README.md index 7c517da..75bc8e9 100644 --- a/README.md +++ b/README.md @@ -198,11 +198,12 @@ and `avif` or `webp` for images, as they greatly reduce the size of the generated files. If you still use AnkiMobile (the [proprietary](https://www.gnu.org/proprietary/) Anki app), -set `audio_format` to [caf](https://en.wikipedia.org/wiki/Core_Audio_Format) for compatibility. -The resulting files will use `Opus` as the coding format and Apple's Core Audio -format as the container format, but will still have the `.ogg` extension to make -it easier to open in media players because the `.caf` extension is not commonly -recognized. +set `opus_container` to `m4a` or `webm`. I'll allow iOS to play Opus files, while still maintaining +compatibility with non-Apple devices. For really old iOS devices, set `opus_container` to +[`caf`](https://en.wikipedia.org/wiki/Core_Audio_Format). CAF plays only on Anki Desktop, +AnkiWeb in Safari and AnkiMobile, and is really not recommended. (Please note that +[Lockdown Mode](https://support.apple.com/en-us/105120) completely disables Opus and AVIF support, +though you may try to add an exception for AnkiMobile.) If no matter what mpvacious fails to create audio clips and/or snapshots, change `use_ffmpeg` to `yes`. diff --git a/cfg_mgr.lua b/cfg_mgr.lua index b5cf555..96b6859 100644 --- a/cfg_mgr.lua +++ b/cfg_mgr.lua @@ -11,6 +11,13 @@ local h = require('helpers') local min_side_px = 42 local max_side_px = 640 +local default_height_px = 200 + +-- This constant should be used in place of width and/or height in the config file. +-- It tells the encoder to preserve aspect ratio when downscaling snapshots. +-- The user almost always wants to set either width or height to this value. +-- Note: If set to -1, encoding will fail with the "height/width not divisible by 2" error. +local preserve_aspect_ratio = -2 local self = { config = nil, @@ -21,12 +28,23 @@ local self = { local default_profile_filename = 'subs2srs' local profiles_filename = 'subs2srs_profiles' +local function set_file_extension_for_opus() + -- Default to OGG, then change if an extension is supported. + -- https://en.wikipedia.org/wiki/Core_Audio_Format + self.config.audio_extension = '.ogg' + for _, extension in ipairs({ 'opus', 'm4a', 'webm', 'caf' }) do + if extension == self.config.opus_container then + self.config.audio_extension = '.' .. self.config.opus_container + break + end + end +end + local function set_audio_format() - if self.config.audio_format == 'opus' or self.config.audio_format == 'caf' then + if self.config.audio_format == 'opus' then -- https://opus-codec.org/ - -- https://en.wikipedia.org/wiki/Core_Audio_Format self.config.audio_codec = 'libopus' - self.config.audio_extension = '.ogg' + set_file_extension_for_opus() else self.config.audio_codec = 'libmp3lame' self.config.audio_extension = '.mp3' @@ -44,20 +62,27 @@ local function set_video_format() self.config.snapshot_extension = '.jpg' self.config.snapshot_codec = 'mjpeg' end + -- Animated webp images can only have .webp extension. - -- The user has no choice on this. - self.config.animated_snapshot_extension = '.webp' + -- The user has no choice on this. Same logic for avif. + if self.config.animated_snapshot_format == 'avif' then + self.config.animated_snapshot_extension = '.avif' + self.config.animated_snapshot_codec = 'libaom-av1' + else + self.config.animated_snapshot_extension = '.webp' + self.config.animated_snapshot_codec = 'libwebp' + end end local function ensure_in_range(dimension) - self.config[dimension] = self.config[dimension] < min_side_px and -2 or self.config[dimension] + self.config[dimension] = self.config[dimension] < min_side_px and preserve_aspect_ratio or self.config[dimension] self.config[dimension] = self.config[dimension] > max_side_px and max_side_px or self.config[dimension] end local function conditionally_set_defaults(width, height, quality) if self.config[width] < 1 and self.config[height] < 1 then - self.config[width] = -2 - self.config[height] = 200 + self.config[width] = preserve_aspect_ratio + self.config[height] = default_height_px end if self.config[quality] < 0 or self.config[quality] > 100 then self.config[quality] = 15 @@ -171,4 +196,6 @@ return { reload_from_disk = reload_from_disk, init = init, next_profile = next_profile, + default_height_px = default_height_px, + preserve_aspect_ratio = preserve_aspect_ratio, } diff --git a/encoder.lua b/encoder.lua index 7dd3b5d..e70f93b 100644 --- a/encoder.lua +++ b/encoder.lua @@ -20,7 +20,6 @@ local self = { platform = nil, encoder = nil, output_dir_path = nil, - max_avif_crf = 63, } ------------------------------------------------------------ @@ -67,9 +66,26 @@ local function toms(timestamp) return string.format("%.3f", timestamp) end -local function quality_to_crf(quality, max_crf) - -- Quality is from 0 to 100. (for avif images) CRF is from 0 to 63 and reversed. - return math.floor((100 - quality) / 100 * max_crf) +local function fit_quality_percentage_to_range(quality, worst_val, best_val) + local scaled = worst_val + (best_val - worst_val) * quality / 100 + -- Round to the nearest integer that's better in quality. + if worst_val > best_val then + return math.floor(scaled) + end + return math.ceil(scaled) +end + +local function quality_to_crf_avif(quality_value) + -- Quality is from 0 to 100. For avif images CRF is from 0 to 63 and reversed. + local worst_avif_crf = 63 + local best_avif_crf = 0 + return fit_quality_percentage_to_range(quality_value, worst_avif_crf, best_avif_crf) +end + +local function quality_to_jpeg_qscale(quality_value) + local worst_jpeg_quality = 31 + local best_jpeg_quality = 2 + return fit_quality_percentage_to_range(quality_value, worst_jpeg_quality, best_jpeg_quality) end ------------------------------------------------------------ @@ -77,88 +93,208 @@ end local ffmpeg = {} -ffmpeg.prefix = { find_exec("ffmpeg"), "-hide_banner", "-nostdin", "-y", "-loglevel", "quiet", "-sn", } +ffmpeg.exec = find_exec("ffmpeg") -ffmpeg.prepend = function(args) - if next(args) ~= nil then - for i, value in ipairs(ffmpeg.prefix) do - table.insert(args, i, value) - end - end - return args +ffmpeg.prepend = function(...) + return { + ffmpeg.exec, "-hide_banner", "-nostdin", "-y", "-loglevel", "quiet", "-sn", + ..., + } +end + +local function make_scale_filter(algorithm, width, height) + -- algorithm is either "sinc" or "lanczos" + -- Static image scaling uses "sinc", which is the best downscaling algorithm: https://stackoverflow.com/a/6171860 + -- Animated images use Lanczos, which is faster. + return string.format( + "scale='min(%d,iw)':'min(%d,ih)':flags=%s+accurate_rnd", + width, height, algorithm + ) +end + +local function static_scale_filter() + return make_scale_filter('sinc', self.config.snapshot_width, self.config.snapshot_height) +end + +local function animated_scale_filter() + return make_scale_filter( + 'lanczos', self.config.animated_snapshot_width, self.config.animated_snapshot_height) end ffmpeg.make_static_snapshot_args = function(source_path, output_path, timestamp) - local args = ffmpeg.prepend { - '-an', - '-ss', toms(timestamp), - '-i', source_path, - '-map_metadata', '-1', - '-vcodec', self.config.snapshot_codec, - '-lossless', '0', - '-compression_level', '6', - '-qscale:v', tostring(self.config.snapshot_quality), - '-vf', string.format('scale=%d:%d', self.config.snapshot_width, self.config.snapshot_height), - '-vframes', '1', - output_path - } + local encoder_args if self.config.snapshot_format == 'avif' then - -- Avif quality can be controlled with crf. - table.insert(args, #args, '-crf') - table.insert(args, #args, tostring(quality_to_crf(self.config.snapshot_quality, self.max_avif_crf))) + encoder_args = { + '-c:v', 'libaom-av1', + -- cpu-used < 6 can take a lot of time to encode. + '-cpu-used', '6', + -- Avif quality can be controlled with crf. + '-crf', tostring(quality_to_crf_avif(self.config.snapshot_quality)), + '-still-picture', '1', + } + elseif self.config.snapshot_format == 'webp' then + encoder_args = { + '-c:v', 'libwebp', + '-compression_level', '6', + '-quality', tostring(self.config.snapshot_quality), + } + else + encoder_args = { + '-c:v', 'mjpeg', + '-q:v', tostring(quality_to_jpeg_qscale(self.config.snapshot_quality)), + } end + + local args = ffmpeg.prepend( + '-an', + '-ss', toms(timestamp), + '-i', source_path, + '-map_metadata', '-1', + '-vf', static_scale_filter(), + '-frames:v', '1', + h.unpack(encoder_args) + ) + table.insert(args, output_path) + return args +end + +ffmpeg.make_animated_snapshot_args = function(source_path, output_path, start_timestamp, end_timestamp) + local encoder_args + if self.config.animated_snapshot_format == 'avif' then + encoder_args = { + '-c:v', 'libaom-av1', + -- cpu-used < 6 can take a lot of time to encode. + '-cpu-used', '6', + -- Avif quality can be controlled with crf. + '-crf', tostring(quality_to_crf_avif(self.config.animated_snapshot_quality)), + } + else + -- Documentation: https://www.ffmpeg.org/ffmpeg-all.html#libwebp + encoder_args = { + '-c:v', 'libwebp', + '-compression_level', '6', + '-quality', tostring(self.config.animated_snapshot_quality), + } + end + + local args = ffmpeg.prepend( + '-an', + '-ss', toms(start_timestamp), + '-to', toms(end_timestamp), + '-i', source_path, + '-map_metadata', '-1', + '-loop', '0', + '-vf', string.format( + 'fps=%d,%s', self.config.animated_snapshot_fps, animated_scale_filter()), + h.unpack(encoder_args) + ) + table.insert(args, output_path) return args end -ffmpeg.animated_snapshot_filters = function() +local function make_loudnorm_targets() return string.format( - "fps=%d,scale=%d:%d:flags=lanczos", - self.config.animated_snapshot_fps, - self.config.animated_snapshot_width, - self.config.animated_snapshot_height + 'loudnorm=I=%s:LRA=%s:TP=%s:dual_mono=true', + self.config.loudnorm_target, + self.config.loudnorm_range, + self.config.loudnorm_peak ) end -ffmpeg.make_animated_snapshot_args = function(source_path, output_path, start_timestamp, end_timestamp) - -- Documentation: https://www.ffmpeg.org/ffmpeg-all.html#libwebp - return ffmpeg.prepend { - '-an', - '-ss', toms(start_timestamp), - '-t', toms(end_timestamp - start_timestamp), - '-i', source_path, - '-map_metadata', '-1', - '-vcodec', 'libwebp', - '-loop', '0', - '-lossless', '0', - '-compression_level', '6', - '-quality', tostring(self.config.animated_snapshot_quality), - '-vf', ffmpeg.animated_snapshot_filters(), - output_path - } +local function parse_loudnorm(loudnorm_targets, json_extractor, loudnorm_consumer) + local function warn() + msg.warn('Failed to measure loudnorm stats, falling back on dynamic loudnorm.') + end + + return function(success, result) + local json + if success and result.status == 0 then + json = json_extractor(result.stdout, result.stderr) + end + + if json == nil then + warn() + loudnorm_consumer(loudnorm_targets) + return + end + + local loudnorm_args = { loudnorm_targets } + local function add_arg(name, val) + -- loudnorm sometimes fails to gather stats for extremely short inputs. + -- Simply omit the stat to fall back on dynamic loudnorm. + if val ~= '-inf' and val ~= 'inf' then + table.insert(loudnorm_args, string.format('%s=%s', name, val)) + else + warn() + end + end + + local stats = utils.parse_json(json) + add_arg('measured_I', stats.input_i) + add_arg('measured_LRA', stats.input_lra) + add_arg('measured_TP', stats.input_tp) + add_arg('measured_thresh', stats.input_thresh) + add_arg('offset', stats.target_offset) + + loudnorm_consumer(table.concat(loudnorm_args, ':')) + end end ffmpeg.append_user_audio_args = function(args) - local args_iter = string.gmatch(self.config.ffmpeg_audio_args, "%S+") - local filters = ( - self.config.tie_volumes - and string.format("volume=%.1f", mp.get_property_native('volume') / 100.0) - or "" - ) - for arg in args_iter do - if arg == '-af' or arg == '-filter:a' then - filters = #filters > 0 and string.format("%s,%s", args_iter(), filters) or args_iter() + local new_args = {} + local filters = '' + + local function add_filter(flt) + if #filters == 0 then + filters = flt else - table.insert(args, #args, arg) + filters = string.format('%s,%s', filters, flt) end end + + local function separate_filters(args) + -- Would've strongly preferred + -- if args[i] == '-af' or arg == '-filter:a' then + -- i = i + 1 + -- add_filter(args[i]) + -- but https://lua.org/manual/5.4/manual.html#3.3.5 says that + -- "You should not change the value of the control variable during the loop." + local expect_filter = false + for i = 1, #args do + if args[i] == '-af' or arg == '-filter:a' then + expect_filter = true + else + if expect_filter then + add_filter(args[i]) + else + table.insert(new_args, args[i]) + end + expect_filter = false + end + end + end + + separate_filters(args) + if self.config.tie_volumes then + add_filter(string.format("volume=%.1f", mp.get_property_native('volume') / 100.0)) + end + + local user_args = {} + for arg in string.gmatch(self.config.ffmpeg_audio_args, "%S+") do + table.insert(user_args, arg) + end + separate_filters(user_args) + if #filters > 0 then - table.insert(args, #args, '-af') - table.insert(args, #args, filters) + table.insert(new_args, '-af') + table.insert(new_args, filters) end - return args + return new_args end -ffmpeg.make_audio_args = function(source_path, output_path, start_timestamp, end_timestamp) +ffmpeg.make_audio_args = function( + source_path, output_path, start_timestamp, end_timestamp, args_consumer +) local audio_track = h.get_active_track('audio') local audio_track_id = audio_track['ff-index'] @@ -167,23 +303,76 @@ ffmpeg.make_audio_args = function(source_path, output_path, start_timestamp, end audio_track_id = 'a' end - local args = ffmpeg.prepend { - '-vn', - '-ss', toms(start_timestamp), - '-to', toms(end_timestamp), - '-i', source_path, - '-map_metadata', '-1', - '-map', string.format("0:%s", tostring(audio_track_id)), - '-ac', '1', - '-codec:a', self.config.audio_codec, - '-f', self.config.audio_format, - '-vbr', 'on', - '-compression_level', '10', - '-application', 'voip', - '-b:a', tostring(self.config.audio_bitrate), - output_path - } - return ffmpeg.append_user_audio_args(args) + local function make_ffargs(...) + return ffmpeg.append_user_audio_args( + ffmpeg.prepend( + '-vn', + '-ss', toms(start_timestamp), + '-to', toms(end_timestamp), + '-i', source_path, + '-map_metadata', '-1', + '-map_chapters', '-1', + '-map', string.format("0:%s", tostring(audio_track_id)), + '-ac', '1', + ... + ) + ) + end + + local function make_encoding_args(loudnorm_args) + local encoder_args + if self.config.audio_format == 'opus' then + encoder_args = { + '-c:a', 'libopus', + '-application', 'voip', + '-apply_phase_inv', '0', -- Improves mono audio. + } + if self.config.opus_container == 'm4a' then + table.insert(encoder_args, '-f') + table.insert(encoder_args, 'mp4') + end + else + -- https://wiki.hydrogenaud.io/index.php?title=LAME#Recommended_encoder_settings: + -- "For very low bitrates, up to 100kbps, ABR is most often the best solution." + encoder_args = { + '-c:a', 'libmp3lame', + '-compression_level', '0', + '-abr', '1', + } + end + + encoder_args = { '-b:a', tostring(self.config.audio_bitrate), h.unpack(encoder_args) } + if loudnorm_args then + table.insert(encoder_args, '-af') + table.insert(encoder_args, loudnorm_args) + end + local args = make_ffargs(h.unpack(encoder_args)) + table.insert(args, output_path) + args_consumer(args) + end + + if not self.config.loudnorm then + make_encoding_args(nil) + return + end + + local loudnorm_targets = make_loudnorm_targets() + local args = make_ffargs('-loglevel', 'info', + '-af', loudnorm_targets .. ':print_format=json') + table.insert(args, '-f') + table.insert(args, 'null') + table.insert(args, '-') + h.subprocess( + args, + parse_loudnorm( + loudnorm_targets, + function(stdout, stderr) + local start, stop, json = string.find(stderr, '%[Parsed_loudnorm_0.-({.-})') + return json + end, + make_encoding_args + ) + ) end ------------------------------------------------------------ @@ -193,55 +382,89 @@ local mpv = { } mpv.exec = find_exec("mpv") -mpv.make_static_snapshot_args = function(source_path, output_path, timestamp) - local args = { +mpv.prepend_common_args = function(source_path, ...) + return { mpv.exec, source_path, + '--no-config', '--loop-file=no', '--keep-open=no', - '--audio=no', - '--no-ocopy-metadata', '--no-sub', - '--frames=1', - '--ovcopts-add=lossless=0', - '--ovcopts-add=compression_level=6', - table.concat { '--ovc=', self.config.snapshot_codec }, - table.concat { '-start=', toms(timestamp), }, - table.concat { '--ovcopts-add=quality=', tostring(self.config.snapshot_quality) }, - table.concat { '--vf-add=scale=', self.config.snapshot_width, ':', self.config.snapshot_height }, - table.concat { '-o=', output_path } + '--no-ocopy-metadata', + ..., } +end + +mpv.make_static_snapshot_args = function(source_path, output_path, timestamp) + local encoder_args if self.config.snapshot_format == 'avif' then - -- Avif quality can be controlled with crf. - table.insert(args, #args, string.format('--ovcopts-add=crf=%d', quality_to_crf(self.config.snapshot_quality, self.max_avif_crf))) + encoder_args = { + '--ovc=libaom-av1', + -- cpu-used < 6 can take a lot of time to encode. + '--ovcopts-add=cpu-used=6', + string.format('--ovcopts-add=crf=%d', quality_to_crf_avif(self.config.snapshot_quality)), + '--ovcopts-add=still-picture=1', + } + elseif self.config.snapshot_format == 'webp' then + encoder_args = { + '--ovc=libwebp', + '--ovcopts-add=compression_level=6', + string.format('--ovcopts-add=quality=%d', self.config.snapshot_quality), + } + else + encoder_args = { + '--ovc=mjpeg', + '--vf-add=scale=out_range=jpeg', + string.format( + '--ovcopts=global_quality=%d*QP2LAMBDA,flags=+qscale', + quality_to_jpeg_qscale(self.config.snapshot_quality) + ), + } end - return args + + return mpv.prepend_common_args( + source_path, + '--audio=no', + '--frames=1', + '--start=' .. toms(timestamp), + string.format('--vf-add=lavfi=[%s]', static_scale_filter()), + '-o=' .. output_path, + h.unpack(encoder_args) + ) end mpv.make_animated_snapshot_args = function(source_path, output_path, start_timestamp, end_timestamp) - return { - mpv.exec, - source_path, - '--loop-file=no', - '--keep-open=no', - '--ovc=libwebp', - '--of=webp', - '--ofopts-add=loop=0', - '--audio=no', - '--no-sub', - '--no-ocopy-metadata', - '--ovcopts-add=lossless=0', - '--ovcopts-add=compression_level=6', - table.concat { '--start=', toms(start_timestamp), }, - table.concat { '--end=', toms(end_timestamp), }, - table.concat { '--ovcopts-add=quality=', tostring(self.config.animated_snapshot_quality) }, - table.concat { '--vf-add=scale=', self.config.animated_snapshot_width, ':', self.config.animated_snapshot_height, ':flags=lanczos', }, - table.concat { '--vf-add=fps=', self.config.animated_snapshot_fps, }, - table.concat { '-o=', output_path }, - } + local encoder_args + if self.config.animated_snapshot_format == 'avif' then + encoder_args = { + '--ovc=libaom-av1', + -- cpu-used < 6 can take a lot of time to encode. + '--ovcopts-add=cpu-used=6', + string.format('--ovcopts-add=crf=%d', quality_to_crf_avif(self.config.animated_snapshot_quality)), + } + else + encoder_args = { + '--ovc=libwebp', + '--ovcopts-add=compression_level=6', + string.format('--ovcopts-add=quality=%d', self.config.animated_snapshot_quality), + } + end + + return mpv.prepend_common_args( + source_path, + '--audio=no', + '--start=' .. toms(start_timestamp), + '--end=' .. toms(end_timestamp), + '--ofopts-add=loop=0', + string.format('--vf-add=fps=%d', self.config.animated_snapshot_fps), + string.format('--vf-add=lavfi=[%s]', animated_scale_filter()), + '-o=' .. output_path, + h.unpack(encoder_args) + ) end -mpv.make_audio_args = function(source_path, output_path, start_timestamp, end_timestamp) +mpv.make_audio_args = function(source_path, output_path, + start_timestamp, end_timestamp, args_consumer) local audio_track = h.get_active_track('audio') local audio_track_id = mp.get_property("aid") @@ -250,31 +473,83 @@ mpv.make_audio_args = function(source_path, output_path, start_timestamp, end_ti audio_track_id = 'auto' end - local args = { - mpv.exec, - source_path, - '--loop-file=no', - '--video=no', - '--no-ocopy-metadata', - '--no-sub', - '--audio-channels=mono', - '--oacopts-add=vbr=on', - '--oacopts-add=application=voip', - '--oacopts-add=compression_level=10', - table.concat { '--oac=', self.config.audio_codec }, - table.concat { '--of=', self.config.audio_format }, - table.concat { '--start=', toms(start_timestamp), }, - table.concat { '--end=', toms(end_timestamp), }, - table.concat { '--aid=', audio_track_id }, - table.concat { '--volume=', self.config.tie_volumes and mp.get_property('volume') or '100' }, - table.concat { '--oacopts-add=b=', self.config.audio_bitrate }, - table.concat { '-o=', output_path } - } - for arg in string.gmatch(self.config.mpv_audio_args, "%S+") do - -- Prepend before output path - table.insert(args, #args, arg) + local function make_mpvargs(...) + local args = mpv.prepend_common_args( + source_path, + '--video=no', + '--aid=' .. audio_track_id, + '--audio-channels=mono', + '--start=' .. toms(start_timestamp), + '--end=' .. toms(end_timestamp), + string.format( + '--volume=%d', + self.config.tie_volumes and mp.get_property('volume') or 100 + ), + ... + ) + for arg in string.gmatch(self.config.mpv_audio_args, "%S+") do + table.insert(args, arg) + end + return args end - return args + + local function make_encoding_args(loudnorm_args) + local encoder_args + if self.config.audio_format == 'opus' then + encoder_args = { + '--oac=libopus', + '--oacopts-add=application=voip', + '--oacopts-add=apply_phase_inv=0', -- Improves mono audio. + } + if self.config.opus_container == 'm4a' then + table.insert(encoder_args, '--of=mp4') + end + else + -- https://wiki.hydrogenaud.io/index.php?title=LAME#Recommended_encoder_settings: + -- "For very low bitrates, up to 100kbps, ABR is most often the best solution." + encoder_args = { + '--oac=libmp3lame', + '--oacopts-add=compression_level=0', + '--oacopts-add=abr=1', + } + end + + local args = make_mpvargs( + '--oacopts-add=b=' .. self.config.audio_bitrate, + '-o=' .. output_path, + h.unpack(encoder_args) + ) + if loudnorm_args then + table.insert(args, '--af-append=' .. loudnorm_args) + end + args_consumer(args) + end + + if not self.config.loudnorm then + make_encoding_args(nil) + return + end + + local loudnorm_targets = make_loudnorm_targets() + h.subprocess( + make_mpvargs( + '-v', + '--af-append=' .. loudnorm_targets .. ':print_format=json', + '--ao=null', + '--of=null' + ), + parse_loudnorm( + loudnorm_targets, + function(stdout, stderr) + local start, stop, json = string.find(stdout, '%[ffmpeg%] ({.-})') + if json then + json = string.gsub(json, '%[ffmpeg%]', '') + end + return json + end, + make_encoding_args + ) + ) end ------------------------------------------------------------ @@ -299,12 +574,15 @@ local create_static_snapshot = function(timestamp, source_path, output_path, on_ end local report_creation_result = function(file_path) - if h.file_exists(file_path) then - msg.info(string.format("Created file: %s", file_path)) - return true - else - msg.error(string.format("Couldn't create file: %s", file_path)) - return false + return function(success, result) + -- result is nil on success for screenshot-to-file. + if success and (result == nil or result.status == 0) and h.file_exists(file_path) then + msg.info(string.format("Created file: %s", file_path)) + return true + else + msg.error(string.format("Couldn't create file: %s", file_path)) + return false + end end end @@ -318,10 +596,7 @@ local create_snapshot = function(start_timestamp, end_timestamp, current_timesta local source_path = mp.get_property("path") local output_path = utils.join_path(self.output_dir_path, filename) - local on_finish = function() - report_creation_result(output_path) - end - + local on_finish = report_creation_result(output_path) if self.config.animated_snapshot_enabled then create_animated_snapshot(start_timestamp, end_timestamp, source_path, output_path, on_finish) else @@ -352,15 +627,22 @@ local create_audio = function(start_timestamp, end_timestamp, filename, padding) start_timestamp, end_timestamp = pad_timings(padding, start_timestamp, end_timestamp) end - local args = self.encoder.make_audio_args(source_path, output_path, start_timestamp, end_timestamp) - local on_finish = function() - if report_creation_result(output_path) and self.config.preview_audio then - background_play(output_path, function() - print("Played file: " .. output_path) - end) + local function start_encoding(args) + local on_finish = function(success, result) + local conversion_check = report_creation_result(output_path) + if conversion_check(success, result) and self.config.preview_audio then + background_play(output_path, function() + print("Played file: " .. output_path) + end) + end end + + h.subprocess(args, on_finish) end - h.subprocess(args, on_finish) + + self.encoder.make_audio_args( + source_path, output_path, start_timestamp, end_timestamp, start_encoding + ) else print("Audio will not be created.") end diff --git a/subs2srs.lua b/subs2srs.lua index 35a1f30..76e43f5 100644 --- a/subs2srs.lua +++ b/subs2srs.lua @@ -35,6 +35,25 @@ Usage: For complete usage guide, see ]] +local mp = require('mp') +local utils = require('mp.utils') +local OSD = require('osd_styler') +local cfg_mgr = require('cfg_mgr') +local encoder = require('encoder') +local h = require('helpers') +local Menu = require('menu') +local ankiconnect = require('ankiconnect') +local switch = require('utils.switch') +local play_control = require('utils.play_control') +local secondary_sid = require('subtitles.secondary_sid') +local platform = require('platform.init') +local forvo = require('utils.forvo') +local subs_observer = require('subtitles.observer') +local menu + +------------------------------------------------------------ +-- default config + local config = { -- The user should not modify anything below. @@ -58,19 +77,21 @@ local config = { -- Snapshots snapshot_format = "avif", -- avif, webp or jpg snapshot_quality = 15, -- from 0=lowest to 100=highest - snapshot_width = -2, -- a positive integer or -2 for auto - snapshot_height = 200, -- same + snapshot_width = cfg_mgr.preserve_aspect_ratio, -- a positive integer or -2 for auto + snapshot_height = cfg_mgr.default_height_px, -- same screenshot = false, -- create a screenshot instead of a snapshot; see example config. -- Animations animated_snapshot_enabled = false, -- if enabled captures the selected segment of the video, instead of just a frame + animated_snapshot_format = "avif", -- avif or webp animated_snapshot_fps = 10, -- positive integer between 0 and 30 (30 included) - animated_snapshot_width = -2, -- positive integer or -2 to scale it maintaining ratio (height must not be -2 in that case) - animated_snapshot_height = 200, -- positive integer or -2 to scale it maintaining ratio (width must not be -2 in that case) + animated_snapshot_width = cfg_mgr.preserve_aspect_ratio, -- positive integer or -2 to scale it maintaining ratio (height must not be -2 in that case) + animated_snapshot_height = cfg_mgr.default_height_px, -- positive integer or -2 to scale it maintaining ratio (width must not be -2 in that case) animated_snapshot_quality = 5, -- positive integer between 0 and 100 (100 included) -- Audio clips audio_format = "opus", -- opus or mp3 + opus_container = "ogg", -- ogg, opus, m4a, webm or caf audio_bitrate = "18k", -- from 16k to 32k audio_padding = 0.12, -- Set a pad to the dialog timings. 0.5 = audio is padded by .5 seconds. 0 = disable. tie_volumes = false, -- if set to true, the volume of the outputted audio file depends on the volume of the player at the time of export @@ -81,9 +102,20 @@ local config = { menu_font_size = 25, show_selected_text = true, + -- Make sure to remove loudnorm from ffmpeg_audio_args and mpv_audio_args before enabling. + loudnorm = false, + loudnorm_target = -16, + loudnorm_range = 11, + loudnorm_peak = -1.5, + -- Custom encoding args - ffmpeg_audio_args = '-af loudnorm=I=-16:TP=-1.5:LRA=11', - mpv_audio_args = '--af-append=loudnorm=I=-16:TP=-1.5:LRA=11', + -- Defaults are for backward compatibility, in case someone + -- updates mpvacious without updating their config. + -- Better to remove loudnorm from custom args and enable two-pass loudnorm. + -- Enabling loudnorm both through the separate switch and through custom args + -- can lead to unpredictable results. + ffmpeg_audio_args = '-af loudnorm=I=-16:TP=-1.5:LRA=11:dual_mono=true', + mpv_audio_args = '--af-append=loudnorm=I=-16:TP=-1.5:LRA=11:dual_mono=true', -- Anki create_deck = false, -- automatically create a deck for new cards @@ -134,21 +166,6 @@ local profiles = { active = "subs2srs", } -local mp = require('mp') -local utils = require('mp.utils') -local OSD = require('osd_styler') -local cfg_mgr = require('cfg_mgr') -local encoder = require('encoder') -local h = require('helpers') -local Menu = require('menu') -local ankiconnect = require('ankiconnect') -local switch = require('utils.switch') -local play_control = require('utils.play_control') -local secondary_sid = require('subtitles.secondary_sid') -local platform = require('platform.init') -local forvo = require('utils.forvo') -local subs_observer = require('subtitles.observer') -local menu ------------------------------------------------------------ -- utility functions @@ -179,6 +196,7 @@ local codec_support = (function() return { snapshot = { + ['libaom-av1'] = is_image_supported('libaom-av1'), libwebp = is_image_supported('libwebp'), mjpeg = is_image_supported('mjpeg'), },