Skip to content

Commit

Permalink
Queue images in memory instead of GPU when multiple request comes in.
Browse files Browse the repository at this point in the history
  • Loading branch information
xnorpx committed Jan 20, 2024
1 parent e12f359 commit f7f4618
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 23 deletions.
3 changes: 2 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "blue-candle"
version = "0.5.0"
version = "0.6.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand All @@ -19,10 +19,11 @@ candle-nn = "0"
clap = { version = "4", features = ["derive"] }
fast_image_resize = "*"
futures = "0"
intel-mkl-src = { version = "0", optional = true }
hf-hub = { version = "0", features = ["tokio"] }
image = "0"
imageproc = "0"
intel-mkl-src = { version = "0", optional = true }
num_cpus = "1"
reqwest = { version = "0", features = ["stream", "multipart", "json"] }
rusttype = "0"
serde = "1"
Expand Down
65 changes: 45 additions & 20 deletions src/bin/blue_candle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,23 +101,46 @@ pub struct Args {
/// Sets the level of logging
#[clap(short, long, value_enum, default_value_t = LogLevel::Info)]
log_level: LogLevel,

/// Max blocking threads, max will be number of cores of the system
#[arg(long)]
pub blocking_threads: Option<usize>,
}

#[tokio::main]
async fn main() -> anyhow::Result<()> {
fn main() -> anyhow::Result<()> {
setup_ansi_support();

// Run CPU inference on one core
env::set_var("RAYON_NUM_THREADS", "1");

let args = Args::parse();

let blocking_threads = if !args.cpu && cuda_is_available() {
// When running GPU we only run one worker thread to ensure that we
// queue work in in memory and not on the GPU memory. This to avoid
// overload the GPU memory.
1
} else {
// Run CPU inference on one core
env::set_var("RAYON_NUM_THREADS", "1");
let num_cores = num_cpus::get();
let blocking_threads = args.blocking_threads.unwrap_or(num_cores - 1);
blocking_threads.clamp(1, num_cores - 1)
};

// Configure Tokio
let rt = tokio::runtime::Builder::new_multi_thread()
.worker_threads(1) // Number of request will be low so 1 thread is enough
.max_blocking_threads(blocking_threads) // Number of request for processing
.enable_all()
.build()?;

debug!("Tokio initilized with {blocking_threads} blocking threads.");

// Logging
let _guard = if let Some(log_path) = args.log_path.clone() {
println!(
"Starting Blue Candle, logging into: {}/blue_candle.log",
log_path
);
let file_appender = tracing_appender::rolling::never(&log_path, "blue_candle.log");
let file_appender = tracing_appender::rolling::daily(&log_path, "blue_candle.log");
let (non_blocking, _guard) = tracing_appender::non_blocking(file_appender);
tracing_subscriber::fmt()
.with_writer(non_blocking)
Expand All @@ -132,8 +155,6 @@ async fn main() -> anyhow::Result<()> {
None
};

ensure_directory_exists(args.image_path.clone()).await?;

let detector = Detector::new(
args.cpu,
args.model.clone(),
Expand All @@ -143,20 +164,24 @@ async fn main() -> anyhow::Result<()> {
args.image_path.clone(),
)?;

if let Some(model_path) = args.model_path {
download_models(model_path).await?;
return Ok(());
}
if args.test {
return test(detector, args).await;
}
rt.block_on(async {
ensure_directory_exists(args.image_path.clone()).await?;

match args.image.clone() {
None => run_server(args, detector).await?,
Some(image) => test_image(image, args, detector).await?,
};
if let Some(model_path) = args.model_path {
download_models(model_path).await?;
return Ok(());
}
if args.test {
test(detector, args).await?;
return Ok(());
}

Ok(())
match args.image.clone() {
None => run_server(args, detector).await?,
Some(image) => test_image(image, args, detector).await?,
};
Ok(())
})
}

async fn run_server(args: Args, detector: Detector) -> anyhow::Result<()> {
Expand Down

0 comments on commit f7f4618

Please sign in to comment.