From 918c85eeb40cd24c62dc7e2a9c25885305290175 Mon Sep 17 00:00:00 2001 From: Josh Holmer Date: Wed, 22 May 2024 23:51:34 -0400 Subject: [PATCH] Add ffmpeg decoder, similar to the previous VS decoder --- CHANGELOG.md | 5 ++ Cargo.lock | 153 ++++++++++++++++++++++++++++++++++- Cargo.toml | 8 +- src/decoder.rs | 34 +++++++- src/ffmpeg.rs | 195 +++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 3 + src/vapoursynth.rs | 2 +- src/y4m.rs | 25 +----- 8 files changed, 396 insertions(+), 29 deletions(-) create mode 100644 src/ffmpeg.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 26deee9..5ac2191 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +## Version 0.12.0 + +- [Breaking] Move `VideoDetails` struct from `y4m` module to `decoder` module, since it is not specific to y4m +- Add support for Ffmpeg decoder (requires Cargo `ffmpeg` feature, disabled by default) + ## Version 0.11.0 - Add support for Vapoursynth decoder (requires Cargo `vapoursynth` feature, disabled by default) diff --git a/Cargo.lock b/Cargo.lock index 624e781..b678d62 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + [[package]] name = "aligned-vec" version = "0.5.0" @@ -94,12 +103,13 @@ checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "av-scenechange" -version = "0.11.0" +version = "0.12.0" dependencies = [ "anyhow", "clap", "console", "fern", + "ffmpeg-the-third", "log", "rav1e", "serde", @@ -125,12 +135,38 @@ dependencies = [ "v_frame", ] +[[package]] +name = "bindgen" +version = "0.69.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" +dependencies = [ + "bitflags 2.5.0", + "cexpr", + "clang-sys", + "itertools", + "lazy_static", + "lazycell", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", +] + [[package]] name = "bitflags" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" + [[package]] name = "bitstream-io" version = "2.3.0" @@ -160,6 +196,15 @@ dependencies = [ "once_cell", ] +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-expr" version = "0.15.8" @@ -176,6 +221,17 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "clang-sys" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1" +dependencies = [ + "glob", + "libc", + "libloading", +] + [[package]] name = "clap" version = "4.5.4" @@ -287,6 +343,30 @@ dependencies = [ "log", ] +[[package]] +name = "ffmpeg-sys-the-third" +version = "2.0.0+ffmpeg-7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a82bfdb0a7925996707f0a7dc37b2f3251ff5a15d26e78c586adb60c240dedc5" +dependencies = [ + "bindgen", + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "ffmpeg-the-third" +version = "2.0.1+ffmpeg-7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4aa99eb55979d5c1db3b0b7a807a5e50dda07f5f6c2dbc6e9b50c205f611646" +dependencies = [ + "bitflags 2.5.0", + "ffmpeg-sys-the-third", + "libc", +] + [[package]] name = "getrandom" version = "0.2.15" @@ -298,6 +378,12 @@ dependencies = [ "wasi", ] +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + [[package]] name = "hashbrown" version = "0.14.5" @@ -367,6 +453,12 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + [[package]] name = "libc" version = "0.2.155" @@ -384,6 +476,16 @@ dependencies = [ "once_cell", ] +[[package]] +name = "libloading" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" +dependencies = [ + "cfg-if", + "windows-targets", +] + [[package]] name = "log" version = "0.4.21" @@ -666,6 +768,41 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "regex" +version = "1.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "ryu" version = "1.0.18" @@ -721,6 +858,12 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "simd_helpers" version = "0.1.0" @@ -946,7 +1089,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c7df702c65dec1cfa3b93f824a1e58d5b0fdb82ac8a722596f43d7214282f56" dependencies = [ "anyhow", - "bitflags", + "bitflags 1.3.2", "lazy_static", "thiserror", "vapoursynth-sys", @@ -961,6 +1104,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "version-compare" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index 94cbee4..f688bb5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "av-scenechange" -version = "0.11.0" +version = "0.12.0" authors = ["Josh Holmer "] edition = "2021" description = "Estimates frames in a video where a scenecut would be ideal" @@ -24,6 +24,10 @@ fern = { version = "0.6", optional = true } tracing-subscriber = { version = "0.3.18", optional = true } tracing-chrome = { version = "0.7.1", optional = true } tracing = { version = "0.1.40", optional = true } +ffmpeg-the-third = { version = "2.0.1", optional = true, default-features = false, features = [ + "codec", + "format", +] } [dependencies.vapoursynth] version = "0.4.0" @@ -35,6 +39,7 @@ features = [ ] optional = true + [features] default = ["binary"] binary = ["clap", "serialize"] @@ -46,6 +51,7 @@ tracing = [ "dep:tracing", "rav1e/tracing", ] +ffmpeg = ["ffmpeg-the-third"] [[bin]] name = "av-scenechange" diff --git a/src/decoder.rs b/src/decoder.rs index bee3de7..a099e72 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -1,15 +1,18 @@ use std::io::Read; -use rav1e::{Frame, Pixel}; +use rav1e::prelude::{ChromaSamplePosition, ChromaSampling, Frame, Pixel, Rational}; +#[cfg(feature = "ffmpeg")] +use crate::ffmpeg::FfmpegDecoder; #[cfg(feature = "vapoursynth")] use crate::vapoursynth::VapoursynthDecoder; -use crate::y4m::VideoDetails; pub enum Decoder { Y4m(y4m::Decoder), #[cfg(feature = "vapoursynth")] Vapoursynth(VapoursynthDecoder), + #[cfg(feature = "ffmpeg")] + Ffmpeg(FfmpegDecoder), } impl Decoder { @@ -21,6 +24,8 @@ impl Decoder { Decoder::Y4m(dec) => Ok(crate::y4m::get_video_details(dec)), #[cfg(feature = "vapoursynth")] Decoder::Vapoursynth(dec) => dec.get_video_details(), + #[cfg(feature = "ffmpeg")] + Decoder::Ffmpeg(dec) => Ok(dec.video_details), } } @@ -35,6 +40,31 @@ impl Decoder { Decoder::Y4m(dec) => crate::y4m::read_video_frame::(dec, video_details), #[cfg(feature = "vapoursynth")] Decoder::Vapoursynth(dec) => dec.read_video_frame::(video_details), + #[cfg(feature = "ffmpeg")] + Decoder::Ffmpeg(dec) => dec.read_video_frame::(), + } + } +} + +#[derive(Debug, Clone, Copy)] +pub struct VideoDetails { + pub width: usize, + pub height: usize, + pub bit_depth: usize, + pub chroma_sampling: ChromaSampling, + pub chroma_sample_position: ChromaSamplePosition, + pub time_base: Rational, +} + +impl Default for VideoDetails { + fn default() -> Self { + VideoDetails { + width: 640, + height: 480, + bit_depth: 8, + chroma_sampling: ChromaSampling::Cs420, + chroma_sample_position: ChromaSamplePosition::Unknown, + time_base: Rational { num: 30, den: 1 }, } } } diff --git a/src/ffmpeg.rs b/src/ffmpeg.rs new file mode 100644 index 0000000..4721296 --- /dev/null +++ b/src/ffmpeg.rs @@ -0,0 +1,195 @@ +extern crate ffmpeg_the_third as ffmpeg; + +use std::path::Path; + +use anyhow::bail; +use ffmpeg::codec::{decoder, packet}; +use ffmpeg::format::context; +use ffmpeg::media::Type; +use ffmpeg::{format, frame}; +use rav1e::color::{ChromaSamplePosition, ChromaSampling}; +use rav1e::data::Rational; +use rav1e::{Frame, Pixel}; + +use crate::decoder::VideoDetails; + +/// An interface that is used for decoding a video stream using ffmpeg +/// +/// There have been desync issue reported with this decoder +/// on some video files. Use at your own risk! +pub struct FfmpegDecoder { + input_ctx: context::Input, + decoder: decoder::Video, + pub video_details: VideoDetails, + frameno: usize, + stream_index: usize, + end_of_stream: bool, + eof_sent: bool, +} + +impl FfmpegDecoder { + /// Initialize a new ffmpeg decoder for a given input file + /// + /// # Errors + /// + /// - If ffmpeg is not available or not working on the system + /// - If the source contains no video + /// - If the source has an unsupported pixel format + pub fn new>(input: P) -> anyhow::Result { + ffmpeg::init()?; + + let input_ctx = format::input(&input)?; + let input = input_ctx + .streams() + .best(Type::Video) + .ok_or_else(|| anyhow::anyhow!("Could not find video stream"))?; + let stream_index = input.index(); + let mut decoder = ffmpeg::codec::context::Context::from_parameters(input.parameters())? + .decoder() + .video()?; + decoder.set_parameters(input.parameters())?; + + let frame_rate = input.avg_frame_rate(); + Ok(Self { + video_details: VideoDetails { + width: decoder.width() as usize, + height: decoder.height() as usize, + bit_depth: match decoder.format() { + format::pixel::Pixel::YUV420P + | format::pixel::Pixel::YUV422P + | format::pixel::Pixel::YUV444P + | format::pixel::Pixel::YUVJ420P + | format::pixel::Pixel::YUVJ422P + | format::pixel::Pixel::YUVJ444P => 8, + format::pixel::Pixel::YUV420P10LE + | format::pixel::Pixel::YUV422P10LE + | format::pixel::Pixel::YUV444P10LE => 10, + format::pixel::Pixel::YUV420P12LE + | format::pixel::Pixel::YUV422P12LE + | format::pixel::Pixel::YUV444P12LE => 12, + _ => { + bail!("Unsupported pixel format {:?}", decoder.format()); + } + }, + chroma_sampling: match decoder.format() { + format::pixel::Pixel::YUV420P + | format::pixel::Pixel::YUVJ420P + | format::pixel::Pixel::YUV420P10LE + | format::pixel::Pixel::YUV420P12LE => ChromaSampling::Cs420, + format::pixel::Pixel::YUV422P + | format::pixel::Pixel::YUVJ422P + | format::pixel::Pixel::YUV422P10LE + | format::pixel::Pixel::YUV422P12LE => ChromaSampling::Cs422, + format::pixel::Pixel::YUV444P + | format::pixel::Pixel::YUVJ444P + | format::pixel::Pixel::YUV444P10LE + | format::pixel::Pixel::YUV444P12LE => ChromaSampling::Cs444, + _ => { + bail!("Unsupported pixel format {:?}", decoder.format()); + } + }, + chroma_sample_position: match decoder.format() { + format::pixel::Pixel::YUV422P + | format::pixel::Pixel::YUV422P10LE + | format::pixel::Pixel::YUV422P12LE => ChromaSamplePosition::Vertical, + _ => ChromaSamplePosition::Colocated, + }, + time_base: Rational::new( + frame_rate.denominator() as u64, + frame_rate.numerator() as u64, + ), + }, + decoder, + input_ctx, + frameno: 0, + stream_index, + end_of_stream: false, + eof_sent: false, + }) + } + + fn decode_frame(&self, decoded: &frame::Video) -> Frame { + const SB_SIZE_LOG2: usize = 6; + const SB_SIZE: usize = 1 << SB_SIZE_LOG2; + const SUBPEL_FILTER_SIZE: usize = 8; + const FRAME_MARGIN: usize = 16 + SUBPEL_FILTER_SIZE; + const LUMA_PADDING: usize = SB_SIZE + FRAME_MARGIN; + + let mut f: Frame = Frame::new_with_padding( + self.video_details.width, + self.video_details.height, + self.video_details.chroma_sampling, + LUMA_PADDING, + ); + let width = self.video_details.width; + let height = self.video_details.height; + let bit_depth = self.video_details.bit_depth; + let bytes = if bit_depth > 8 { 2 } else { 1 }; + let (chroma_width, _) = self + .video_details + .chroma_sampling + .get_chroma_dimensions(width, height); + f.planes[0].copy_from_raw_u8(decoded.data(0), width * bytes, bytes); + f.planes[1].copy_from_raw_u8(decoded.data(1), chroma_width * bytes, bytes); + f.planes[2].copy_from_raw_u8(decoded.data(2), chroma_width * bytes, bytes); + f + } + + /// # Errors + /// + /// - If there are no frames remaining + pub fn read_video_frame(&mut self) -> anyhow::Result> { + // For some reason there's a crap ton of work needed to get ffmpeg to do something simple, + // because each codec has it's own stupid way of doing things and they don't all + // decode the same way. + // + // Maybe ffmpeg could have made a simple, singular interface that does this for us, + // but noooooo. + // + // Reference: https://ffmpeg.org/doxygen/trunk/api-h264-test_8c_source.html#l00110 + loop { + // This iterator is actually really stupid... it doesn't reset itself after each `new`. + // But that solves our lifetime hell issues, ironically. + let packet = self + .input_ctx + .packets() + .next() + .and_then(Result::ok) + .map(|(_, packet)| packet); + + let mut packet = if let Some(packet) = packet { + packet + } else { + self.end_of_stream = true; + packet::Packet::empty() + }; + + if self.end_of_stream && !self.eof_sent { + let _ = self.decoder.send_eof(); + self.eof_sent = true; + } + + if self.end_of_stream || packet.stream() == self.stream_index { + let mut decoded = frame::Video::new( + self.decoder.format(), + self.video_details.width as u32, + self.video_details.height as u32, + ); + packet.set_pts(Some(self.frameno as i64)); + packet.set_dts(Some(self.frameno as i64)); + + if !self.end_of_stream { + let _ = self.decoder.send_packet(&packet); + } + + if self.decoder.receive_frame(&mut decoded).is_ok() { + let f = self.decode_frame(&decoded); + self.frameno += 1; + return Ok(f); + } else if self.end_of_stream { + bail!("No frames left"); + } + } + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 788fa1d..6d97dec 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -90,6 +90,9 @@ #![warn(clippy::missing_panics_doc)] pub mod decoder; + +#[cfg(feature = "ffmpeg")] +pub mod ffmpeg; #[cfg(feature = "vapoursynth")] pub mod vapoursynth; mod y4m; diff --git a/src/vapoursynth.rs b/src/vapoursynth.rs index 6a726d2..8ee934f 100644 --- a/src/vapoursynth.rs +++ b/src/vapoursynth.rs @@ -11,7 +11,7 @@ use vapoursynth::{ vsscript::{Environment, EvalFlags}, }; -use crate::y4m::VideoDetails; +use crate::decoder::VideoDetails; const OUTPUT_INDEX: i32 = 0; diff --git a/src/y4m.rs b/src/y4m.rs index 3ba8794..fb94c4e 100644 --- a/src/y4m.rs +++ b/src/y4m.rs @@ -2,6 +2,8 @@ use std::io::Read; use rav1e::prelude::{ChromaSamplePosition, ChromaSampling, Frame, Pixel, Rational}; +use crate::decoder::VideoDetails; + pub fn get_video_details(dec: &y4m::Decoder) -> VideoDetails { let width = dec.get_width(); let height = dec.get_height(); @@ -68,26 +70,3 @@ pub fn read_video_frame( }) .map_err(|e| e.into()) } - -#[derive(Debug, Clone, Copy)] -pub struct VideoDetails { - pub width: usize, - pub height: usize, - pub bit_depth: usize, - pub chroma_sampling: ChromaSampling, - pub chroma_sample_position: ChromaSamplePosition, - pub time_base: Rational, -} - -impl Default for VideoDetails { - fn default() -> Self { - VideoDetails { - width: 640, - height: 480, - bit_depth: 8, - chroma_sampling: ChromaSampling::Cs420, - chroma_sample_position: ChromaSamplePosition::Unknown, - time_base: Rational { num: 30, den: 1 }, - } - } -}