From 37e4ab0af4c1201ed521660a10b7ef15c48d9832 Mon Sep 17 00:00:00 2001 From: GlennBullingham Date: Sat, 22 Jan 2022 21:41:27 +0000 Subject: [PATCH 1/9] fix(helm chart): remove local pool config store - Removes -P argument from the mayastor container spec, as the use of a local pool config store has been deprecated and conflicts with the control plane's state store - Removes the same from the pre-generated mayastor-daemonset definition in the /deploy folder, which is referenced in the user docs --- chart/templates/mayastor-daemonset.yaml | 1 - deploy/mayastor-daemonset.yaml | 1 - 2 files changed, 2 deletions(-) diff --git a/chart/templates/mayastor-daemonset.yaml b/chart/templates/mayastor-daemonset.yaml index 3aa5a558c..cee13ff68 100644 --- a/chart/templates/mayastor-daemonset.yaml +++ b/chart/templates/mayastor-daemonset.yaml @@ -53,7 +53,6 @@ spec: - "-g$(MY_POD_IP)" - "-nnats" - "-y/var/local/mayastor/config.yaml" - - "-P/var/local/mayastor/pools.yaml" - "-l{{ include "mayastorCpuSpec" . }}" - "-pmayastor-etcd" command: diff --git a/deploy/mayastor-daemonset.yaml b/deploy/mayastor-daemonset.yaml index 660432ea2..05f297811 100644 --- a/deploy/mayastor-daemonset.yaml +++ b/deploy/mayastor-daemonset.yaml @@ -54,7 +54,6 @@ spec: - "-N$(MY_NODE_NAME)" - "-g$(MY_POD_IP)" - "-nnats" - - "-P/var/local/mayastor/pools.yaml" - "-l1" - "-pmayastor-etcd" command: From 4112646005a58db6ea783bbe3311e52e08fa153f Mon Sep 17 00:00:00 2001 From: Mikhail Tcymbaliuk Date: Thu, 27 Jan 2022 00:59:04 +0100 Subject: [PATCH 2/9] fix(nexus): do not persist faulted state of the last replica In case the last replica fails, it is not marked as not healthy in the ETCD, which allows Caontrol Plane to recover the nexus later without ambiguity (as the replica with the most recent user data remains known). Resolves: CAS-1284 --- mayastor/src/bdev/nexus/nexus_bdev.rs | 39 +++++++++++++++++++- mayastor/src/bdev/nexus/nexus_persistence.rs | 25 ++++++++++++- 2 files changed, 60 insertions(+), 4 deletions(-) diff --git a/mayastor/src/bdev/nexus/nexus_bdev.rs b/mayastor/src/bdev/nexus/nexus_bdev.rs index 5449e96cf..91e604baa 100644 --- a/mayastor/src/bdev/nexus/nexus_bdev.rs +++ b/mayastor/src/bdev/nexus/nexus_bdev.rs @@ -967,8 +967,43 @@ impl Nexus { // schedule the deletion of the child eventhough etcd has not been // updated yet we do not need to wait for that to // complete anyway. - MWQ.enqueue(Command::RemoveDevice(self.name.clone(), name)); - self.persist(PersistOp::Update((uri.clone(), child.state()))) + MWQ.enqueue(Command::RemoveDevice(self.name.clone(), name.clone())); + + // Do not persist child state in case it's the last healthy child of + // the nexus: let Control Plane reconstruct the nexus + // using this device as the replica with the most recent + // user data. + self.persist(PersistOp::UpdateCond( + (uri.clone(), child.state(), &|nexus_info| { + // Determine the amount of healthy replicas in the persistent state and + // check against the last healthy replica remaining. + let num_healthy = nexus_info.children.iter().fold(0, |n, c| { + if c.healthy { + n + 1 + } else { + n + } + }); + + match num_healthy { + 0 => { + warn!( + "nexus {}: no healthy replicas persent in persistent store when retiring replica {}: + not persisting the replica state", + &name, &uri, + ); + false + } + 1 => { + warn!( + "nexus {}: retiring the last healthy replica {}, not persisting the replica state", + &name, &uri, + ); + false + }, + _ => true, + } + }))) .await; } self.resume().await diff --git a/mayastor/src/bdev/nexus/nexus_persistence.rs b/mayastor/src/bdev/nexus/nexus_persistence.rs index 4e71968fb..5c13d17d1 100644 --- a/mayastor/src/bdev/nexus/nexus_persistence.rs +++ b/mayastor/src/bdev/nexus/nexus_persistence.rs @@ -29,20 +29,25 @@ pub struct ChildInfo { } /// Defines the type of persist operations. -pub(crate) enum PersistOp { +pub(crate) enum PersistOp<'a> { /// Create a persistent entry. Create, /// Add a child to an existing persistent entry. AddChild((ChildUri, ChildState)), /// Update a persistent entry. Update((ChildUri, ChildState)), + /// Update a persistent entry only when a precondition on this NexusInfo + /// holds. Predicate is called under protection of the NexusInfo lock, + /// so the check is assumed to be atomic and not interfering with other + /// modifications of the same NexusInfo. + UpdateCond((ChildUri, ChildState, &'a dyn Fn(&NexusInfo) -> bool)), /// Save the clean shutdown variable. Shutdown, } impl Nexus { /// Persist information to the store. - pub(crate) async fn persist(&self, op: PersistOp) { + pub(crate) async fn persist(&self, op: PersistOp<'_>) { if !PersistentStore::enabled() { return; } @@ -87,6 +92,22 @@ impl Nexus { } }); } + // Only update the state of the child if the precondition holds. + PersistOp::UpdateCond((uri, state, f)) => { + // Do not persist the state if predicate fails. + if !f(&nexus_info) { + return; + } + + let uuid = + NexusChild::uuid(&uri).expect("Failed to get child UUID."); + + nexus_info.children.iter_mut().for_each(|c| { + if c.uuid == uuid { + c.healthy = Self::child_healthy(&state); + } + }); + } PersistOp::Shutdown => { // Only update the clean shutdown variable. Do not update the // child state information. From df7f038ffa3b823ee8cfb70b86e69b63c8e40bb1 Mon Sep 17 00:00:00 2001 From: Mikhail Tcymbaliuk Date: Tue, 15 Feb 2022 00:27:18 +0100 Subject: [PATCH 3/9] fix(nexus): serialised nexus i/o suspend/resume Suspend/resume operations on NVMe subsystems are now serialised for nexuses, which properly handles simultaneous I/O suspension/resume operations in case multiple replicas get retired at the same time. Resolves: CAS-1285 --- mayastor/src/bdev/nexus/mod.rs | 1 + mayastor/src/bdev/nexus/nexus_bdev.rs | 116 +---------- mayastor/src/bdev/nexus/nexus_io_subsystem.rs | 182 ++++++++++++++++++ 3 files changed, 192 insertions(+), 107 deletions(-) create mode 100755 mayastor/src/bdev/nexus/nexus_io_subsystem.rs diff --git a/mayastor/src/bdev/nexus/mod.rs b/mayastor/src/bdev/nexus/mod.rs index e0a7af0bd..9bc2634c9 100644 --- a/mayastor/src/bdev/nexus/mod.rs +++ b/mayastor/src/bdev/nexus/mod.rs @@ -28,6 +28,7 @@ mod nexus_channel; pub(crate) mod nexus_child; pub mod nexus_fn_table; pub mod nexus_io; +pub mod nexus_io_subsystem; pub mod nexus_label; pub mod nexus_metadata; pub mod nexus_module; diff --git a/mayastor/src/bdev/nexus/nexus_bdev.rs b/mayastor/src/bdev/nexus/nexus_bdev.rs index 91e604baa..4355243b4 100644 --- a/mayastor/src/bdev/nexus/nexus_bdev.rs +++ b/mayastor/src/bdev/nexus/nexus_bdev.rs @@ -11,7 +11,6 @@ use std::{ ptr::NonNull, }; -use crossbeam::atomic::AtomicCell; use futures::channel::oneshot; use nix::errno::Errno; use serde::Serialize; @@ -35,6 +34,7 @@ use crate::{ ReconfigureCtx, }, nexus_child::{ChildError, ChildState, NexusChild}, + nexus_io_subsystem::NexusIoSubsystem, nexus_label::LabelError, nexus_nbd::{NbdDisk, NbdError}, nexus_persistence::{NexusInfo, PersistOp}, @@ -44,7 +44,6 @@ use crate::{ Bdev, Command, CoreError, - Cores, IoDevice, IoType, Protocol, @@ -434,11 +433,10 @@ pub struct Nexus { pub nexus_target: Option, /// Nexus I/O device. pub io_device: Option, - /// Nexus pause counter to allow concurrent pause/resume. - pause_state: AtomicCell, - pause_waiters: Vec>, /// information saved to a persistent store pub nexus_info: futures::lock::Mutex, + /// Nexus I/O subsystem. + io_subsystem: NexusIoSubsystem, } unsafe impl core::marker::Sync for Nexus {} @@ -546,6 +544,10 @@ impl Nexus { child_count: 0, children: Vec::new(), bdev: Bdev::from(&*b as *const _ as *mut spdk_bdev), + io_subsystem: NexusIoSubsystem::new( + name.to_string(), + Bdev::from(&*b as *const _ as *mut spdk_bdev), + ), state: parking_lot::Mutex::new(NexusState::Init), bdev_raw: Box::into_raw(b), data_ent_offset: 0, @@ -554,8 +556,6 @@ impl Nexus { nexus_target: None, nvme_params, io_device: None, - pause_state: AtomicCell::new(NexusPauseState::Unpaused), - pause_waiters: Vec::new(), nexus_info: futures::lock::Mutex::new(Default::default()), nexus_uuid: Default::default(), }); @@ -785,52 +785,7 @@ impl Nexus { /// Note: in order to handle concurrent resumes properly, this function must /// be called only from the master core. pub async fn resume(&mut self) -> Result<(), Error> { - assert_eq!(Cores::current(), Cores::first()); - - // In case nexus is already unpaused or is being paused, bail out. - if matches!( - self.pause_state.load(), - NexusPauseState::Pausing | NexusPauseState::Unpaused - ) { - return Ok(()); - } - - info!( - "{} resuming nexus, waiters: {}", - self.name, - self.pause_waiters.len(), - ); - - if let Some(Protocol::Nvmf) = self.shared() { - if self.pause_waiters.is_empty() { - if let Some(subsystem) = NvmfSubsystem::nqn_lookup(&self.name) { - self.pause_state.store(NexusPauseState::Unpausing); - subsystem.resume().await.unwrap(); - // The trickiest case: a new waiter appeared during nexus - // unpausing. By the agreement we keep - // nexus paused for the waiters, so pause - // the nexus to restore status quo. - if !self.pause_waiters.is_empty() { - info!( - "{} concurrent nexus pausing requested during unpausing, re-pausing", - self.name, - ); - subsystem.pause().await.unwrap(); - self.pause_state.store(NexusPauseState::Paused); - } - } - } - } - - // Keep the Nexus paused in case there are waiters. - if !self.pause_waiters.is_empty() { - let s = self.pause_waiters.pop().unwrap(); - s.send(0).expect("Nexus pause waiter disappeared"); - } else { - self.pause_state.store(NexusPauseState::Unpaused); - } - - Ok(()) + self.io_subsystem.resume().await } /// Suspend any incoming IO to the bdev pausing the controller allows us to @@ -841,60 +796,7 @@ impl Nexus { /// Note: in order to handle concurrent pauses properly, this function must /// be called only from the master core. pub async fn pause(&mut self) -> Result<(), Error> { - assert_eq!(Cores::current(), Cores::first()); - - let state = self.pause_state.compare_exchange( - NexusPauseState::Unpaused, - NexusPauseState::Pausing, - ); - - match state { - // Pause nexus if it is in the unpaused state. - Ok(NexusPauseState::Unpaused) => { - if let Some(Protocol::Nvmf) = self.shared() { - if let Some(subsystem) = - NvmfSubsystem::nqn_lookup(&self.name) - { - info!( - "{} pausing subsystem {}", - self.name, - subsystem.get_nqn() - ); - subsystem.pause().await.unwrap(); - info!( - "{} subsystem {} paused", - self.name, - subsystem.get_nqn() - ); - } - } - // the fist pause will win - self.pause_state - .compare_exchange( - NexusPauseState::Pausing, - NexusPauseState::Paused, - ) - .unwrap(); - } - - Err(NexusPauseState::Pausing) | Err(NexusPauseState::Paused) => { - // we are already pausing or paused - return Ok(()); - } - - // we must pause again, schedule pause operation - Err(NexusPauseState::Unpausing) => { - return Err(Error::Pause { - state: NexusPauseState::Unpausing, - name: self.name.clone(), - }); - } - _ => { - panic!("Corrupted nexus state"); - } - } - - Ok(()) + self.io_subsystem.suspend().await } // Abort all active I/O for target child and set I/O fail-fast flag diff --git a/mayastor/src/bdev/nexus/nexus_io_subsystem.rs b/mayastor/src/bdev/nexus/nexus_io_subsystem.rs new file mode 100755 index 000000000..7cd271896 --- /dev/null +++ b/mayastor/src/bdev/nexus/nexus_io_subsystem.rs @@ -0,0 +1,182 @@ +use std::{ + collections::VecDeque, + sync::atomic::{AtomicU32, Ordering}, +}; + +use crossbeam::atomic::AtomicCell; +use futures::channel::oneshot; + +use crate::{ + bdev::nexus::nexus_bdev::Error as NexusError, + core::{Bdev, Cores, Protocol, Share}, + subsys::NvmfSubsystem, +}; + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum NexusPauseState { + Unpaused, + Pausing, + Paused, + Unpausing, +} + +/// Abstraction for managing pausing/unpausing I/O on NVMe subsystem, allowing +/// concurrent pause/resume calls by serializing low-level SPDK calls. +#[derive(Debug)] +pub struct NexusIoSubsystem { + name: String, + bdev: Bdev, + pause_state: AtomicCell, + pause_waiters: VecDeque>, + pause_cnt: AtomicU32, +} + +impl NexusIoSubsystem { + /// Create a new instance of Nexus I/O subsystem for a given nexus name and + /// block device. + pub fn new(name: String, bdev: Bdev) -> Self { + Self { + pause_state: AtomicCell::new(NexusPauseState::Unpaused), + pause_waiters: VecDeque::with_capacity(8), /* Default number of + * replicas */ + pause_cnt: AtomicU32::new(0), + name, + bdev, + } + } + + /// Suspend any incoming IO to the bdev pausing the controller allows us to + /// handle internal events and which is a protocol feature. + /// In case concurrent pause requests take place, the other callers + /// will wait till the nexus is resumed and will continue execution + /// with the nexus paused once they are awakened via resume(). + /// Note: in order to handle concurrent pauses properly, this function must + /// be called only from the master core. + pub async fn suspend(&mut self) -> Result<(), NexusError> { + assert_eq!(Cores::current(), Cores::first()); + + trace!(?self.name, "pausing nexus I/O"); + + loop { + let state = self.pause_state.compare_exchange( + NexusPauseState::Unpaused, + NexusPauseState::Pausing, + ); + + match state { + Ok(NexusPauseState::Unpaused) => { + // Pause subsystem. The only acceptable counter transition + // is: 0 -> 1. + assert_eq!( + self.pause_cnt.fetch_add(1, Ordering::SeqCst), + 0, + "Corrupted subsystem pause counter" + ); + + if let Some(Protocol::Nvmf) = self.bdev.shared() { + if let Some(subsystem) = + NvmfSubsystem::nqn_lookup(&self.name) + { + trace!(nexus=%self.name, nqn=%subsystem.get_nqn(), "pausing subsystem"); + subsystem.pause().await.unwrap(); + trace!(nexus=%self.name, nqn=%subsystem.get_nqn(), "subsystem paused"); + } + } + + // Mark subsystem as paused after it has been paused. + self.pause_state + .compare_exchange( + NexusPauseState::Pausing, + NexusPauseState::Paused, + ) + .expect("Failed to mark subsystem as Paused"); + break; + } + // Subsystem is already paused, increment number of paused. + Err(NexusPauseState::Paused) => { + trace!(nexus=%self.name, "nexus is already paused, incrementing refcount"); + self.pause_cnt.fetch_add(1, Ordering::SeqCst); + break; + } + // Wait till the subsystem has completed transition and retry + // operation. + Err(NexusPauseState::Unpausing) + | Err(NexusPauseState::Pausing) => { + trace!(nexus=%self.name, "nexus is in intermediate state, deferring Pause operation"); + let (s, r) = oneshot::channel::(); + self.pause_waiters.push_back(s); + r.await.unwrap(); + trace!(nexus=%self.name, "nexus completed state transition, retrying Pause operation"); + } + _ => { + panic!("Corrupted I/O subsystem state"); + } + }; + } + + // Resume one waiter in case there are any. + if !self.pause_waiters.is_empty() { + let w = self.pause_waiters.pop_front().unwrap(); + trace!(nexus=%self.name, "resuming the first Pause waiter"); + w.send(0).expect("I/O subsystem pause waiter disappeared"); + } + + trace!(?self.name, "nexus I/O paused"); + Ok(()) + } + + /// Resume IO to the bdev. + /// Note: in order to handle concurrent resumes properly, this function must + /// be called only from the master core. + pub async fn resume(&mut self) -> Result<(), NexusError> { + assert_eq!(Cores::current(), Cores::first()); + + trace!(?self.name, "resuming nexus I/O"); + + loop { + match self.pause_state.load() { + // Already unpaused, bail out. + NexusPauseState::Unpaused => { + break; + } + // Simultaneous pausing/unpausing: wait till the subsystem has + // completed transition and retry operation. + NexusPauseState::Pausing | NexusPauseState::Unpausing => { + trace!(?self.name, "nexus is in intermediate state, deferring Resume operation"); + let (s, r) = oneshot::channel::(); + self.pause_waiters.push_back(s); + r.await.unwrap(); + trace!(?self.name, "completed state transition, retrying Resume operation"); + } + // Unpause the subsystem, taking into account the overall number + // of pauses. + NexusPauseState::Paused => { + let v = self.pause_cnt.fetch_sub(1, Ordering::SeqCst); + // In case the last pause discarded, resume the subsystem. + if v == 1 { + if let Some(subsystem) = + NvmfSubsystem::nqn_lookup(&self.name) + { + self.pause_state.store(NexusPauseState::Unpausing); + trace!(nexus=%self.name, nqn=%subsystem.get_nqn(), "resuming subsystem"); + subsystem.resume().await.unwrap(); + trace!(nexus=%self.name, nqn=%subsystem.get_nqn(), "subsystem resumed"); + } + self.pause_state.store(NexusPauseState::Unpaused); + } + break; + } + } + } + + // Resume one waiter in case there are any. + if !self.pause_waiters.is_empty() { + trace!(nexus=%self.name, "resuming the first Resume waiter"); + let w = self.pause_waiters.pop_front().unwrap(); + w.send(0).expect("I/O subsystem resume waiter disappeared"); + } + + trace!(?self.name, "nexus I/O resumed"); + Ok(()) + } +} From f0ed436a729908c4f9724d7668d033200ef41524 Mon Sep 17 00:00:00 2001 From: Tiago Castro Date: Mon, 14 Feb 2022 17:58:16 +0000 Subject: [PATCH 4/9] chore: add env variable to control max number of qpairs Added NVMF_TCP_MAX_QPAIRS_PER_CTRL (includes adminq) to control max number of qpairs per controller. Why this may be useful? Example, if mayastor is configured with 2 cores but the hosts have 32 cores. This may create an unbalanced configuration as mayastor will only use 2 IO qpairs to the replicas. This is an initial approach that allows a user to easily configure how many qpairs the initiator host may use. --- mayastor/src/bdev/nvmx/channel.rs | 2 +- mayastor/src/subsys/config/opts.rs | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/mayastor/src/bdev/nvmx/channel.rs b/mayastor/src/bdev/nvmx/channel.rs index aa9b9410e..5108284d0 100644 --- a/mayastor/src/bdev/nvmx/channel.rs +++ b/mayastor/src/bdev/nvmx/channel.rs @@ -650,7 +650,7 @@ impl NvmeControllerIoChannel { impl Drop for NvmeControllerIoChannel { fn drop(&mut self) { - debug!("I/O channel {:p} dropped", self.0.as_ptr()); + trace!("I/O channel {:p} dropped", self.0.as_ptr()); unsafe { spdk_put_io_channel(self.0.as_ptr()) } } } diff --git a/mayastor/src/subsys/config/opts.rs b/mayastor/src/subsys/config/opts.rs index 06df1f44c..7861d9de4 100644 --- a/mayastor/src/subsys/config/opts.rs +++ b/mayastor/src/subsys/config/opts.rs @@ -181,7 +181,10 @@ impl Default for NvmfTcpTransportOpts { in_capsule_data_size: 4096, max_io_size: 131_072, io_unit_size: 131_072, - max_qpairs_per_ctrl: 32, + max_qpairs_per_ctrl: try_from_env( + "NVMF_TCP_MAX_QPAIRS_PER_CTRL", + 32, + ), num_shared_buf: try_from_env("NVMF_TCP_NUM_SHARED_BUF", 2048), buf_cache_size: try_from_env("NVMF_TCP_BUF_CACHE_SIZE", 64), dif_insert_or_strip: false, From 2e7d86c9c80d90119c14de1fff6c3c7421183129 Mon Sep 17 00:00:00 2001 From: Paul Yoong Date: Wed, 16 Feb 2022 11:44:52 +0000 Subject: [PATCH 5/9] chore(version numbers): increment all to 1.0.0 Ensure all version numbers have been updated. --- Cargo.lock | 20 ++++++++++---------- composer/Cargo.toml | 2 +- csi/Cargo.toml | 6 +++--- devinfo/Cargo.toml | 2 +- jsonrpc/Cargo.toml | 2 +- mayastor/Cargo.toml | 4 ++-- mbus-api/Cargo.toml | 2 +- nvmeadm/Cargo.toml | 2 +- rpc/Cargo.toml | 2 +- spdk-sys/Cargo.toml | 2 +- sysfs/Cargo.toml | 2 +- 11 files changed, 23 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9d5e65d13..c467a0be0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -419,7 +419,7 @@ dependencies = [ [[package]] name = "composer" -version = "0.1.0" +version = "1.0.0" dependencies = [ "bollard", "futures", @@ -551,7 +551,7 @@ dependencies = [ [[package]] name = "csi" -version = "0.2.0" +version = "1.0.0" dependencies = [ "async-stream", "async-trait", @@ -709,7 +709,7 @@ dependencies = [ [[package]] name = "devinfo" -version = "0.1.0" +version = "1.0.0" dependencies = [ "bindgen 0.59.1", "snafu", @@ -1413,7 +1413,7 @@ checksum = "078e285eafdfb6c4b434e0d31e8cfcb5115b651496faca5749b88fafd4f23bfd" [[package]] name = "jsonrpc" -version = "0.1.0" +version = "1.0.0" dependencies = [ "nix", "serde", @@ -1514,7 +1514,7 @@ checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" [[package]] name = "mayastor" -version = "0.9.0" +version = "1.0.0" dependencies = [ "ansi_term 0.12.1", "assert_matches", @@ -1583,7 +1583,7 @@ dependencies = [ [[package]] name = "mbus_api" -version = "0.1.0" +version = "1.0.0" dependencies = [ "async-trait", "composer", @@ -1812,7 +1812,7 @@ dependencies = [ [[package]] name = "nvmeadm" -version = "0.1.0" +version = "1.0.0" dependencies = [ "derive_builder", "enum-primitive-derive", @@ -2260,7 +2260,7 @@ dependencies = [ [[package]] name = "rpc" -version = "0.1.0" +version = "1.0.0" dependencies = [ "bytes", "prost", @@ -2568,7 +2568,7 @@ dependencies = [ [[package]] name = "spdk-sys" -version = "0.1.0" +version = "1.0.0" dependencies = [ "bindgen 0.59.1", "cc", @@ -2696,7 +2696,7 @@ dependencies = [ [[package]] name = "sysfs" -version = "0.1.0" +version = "1.0.0" [[package]] name = "tap" diff --git a/composer/Cargo.toml b/composer/Cargo.toml index dd353222f..3b1b233b1 100644 --- a/composer/Cargo.toml +++ b/composer/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "composer" -version = "0.1.0" +version = "1.0.0" authors = ["Tiago Castro "] edition = "2018" diff --git a/csi/Cargo.toml b/csi/Cargo.toml index 62224b5a8..3e5ac3e37 100644 --- a/csi/Cargo.toml +++ b/csi/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Jan Kryl ", "Jeffry Molanus "] edition = "2018" diff --git a/jsonrpc/Cargo.toml b/jsonrpc/Cargo.toml index 5c5c03b97..e8d792143 100644 --- a/jsonrpc/Cargo.toml +++ b/jsonrpc/Cargo.toml @@ -2,7 +2,7 @@ authors = ["Jeffry Molanus "] edition = "2018" name = "jsonrpc" -version = "0.1.0" +version = "1.0.0" [dependencies] nix = "0.22.1" diff --git a/mayastor/Cargo.toml b/mayastor/Cargo.toml index 5f72d8ff2..51103a12c 100644 --- a/mayastor/Cargo.toml +++ b/mayastor/Cargo.toml @@ -2,7 +2,7 @@ authors = ["Jeffry Molanus "] edition = "2018" name = "mayastor" -version = "0.9.0" +version = "1.0.0" default-run = "mayastor-client" [[bin]] @@ -113,5 +113,5 @@ version = "0.8.2" [dev-dependencies] assert_matches = "1.5.0" composer = { path = "../composer" } -nvmeadm = {path = "../nvmeadm", version = "0.1.0"} +nvmeadm = { path = "../nvmeadm" } run_script = "0.8.0" diff --git a/mbus-api/Cargo.toml b/mbus-api/Cargo.toml index 65232cbda..41321fbc0 100644 --- a/mbus-api/Cargo.toml +++ b/mbus-api/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mbus_api" -version = "0.1.0" +version = "1.0.0" authors = ["Tiago Castro "] edition = "2018" diff --git a/nvmeadm/Cargo.toml b/nvmeadm/Cargo.toml index 830f1c0d3..bfe50da0d 100644 --- a/nvmeadm/Cargo.toml +++ b/nvmeadm/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "nvmeadm" -version = "0.1.0" +version = "1.0.0" authors = ["Jeffry Molanus "] edition = "2018" diff --git a/rpc/Cargo.toml b/rpc/Cargo.toml index ff7abb44e..bbae952e2 100644 --- a/rpc/Cargo.toml +++ b/rpc/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "rpc" -version = "0.1.0" +version = "1.0.0" authors = ["Jeffry Molanus "] edition = "2018" diff --git a/spdk-sys/Cargo.toml b/spdk-sys/Cargo.toml index 3206a1191..4de5fe36f 100644 --- a/spdk-sys/Cargo.toml +++ b/spdk-sys/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "spdk-sys" description = "Rust bindings for SPDK library" -version = "0.1.0" +version = "1.0.0" edition = "2018" links = "spdk" build = "build.rs" diff --git a/sysfs/Cargo.toml b/sysfs/Cargo.toml index 7a80afbdf..1e3fba76f 100644 --- a/sysfs/Cargo.toml +++ b/sysfs/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "sysfs" -version = "0.1.0" +version = "1.0.0" authors = ["Jeffry Molanus "] edition = "2018" From 89524a4d92298889c7b810be977a92fcefec05a7 Mon Sep 17 00:00:00 2001 From: Tiago Castro Date: Wed, 16 Feb 2022 15:49:39 +0000 Subject: [PATCH 6/9] chore: update chart to include nvmf env variables --- chart/templates/mayastor-daemonset.yaml | 5 +++++ deploy/csi-daemonset.yaml | 2 +- deploy/mayastor-daemonset.yaml | 21 +++++++++++++-------- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/chart/templates/mayastor-daemonset.yaml b/chart/templates/mayastor-daemonset.yaml index cee13ff68..c111c1171 100644 --- a/chart/templates/mayastor-daemonset.yaml +++ b/chart/templates/mayastor-daemonset.yaml @@ -36,6 +36,11 @@ spec: env: - name: RUST_LOG value: info,mayastor={{ .Values.mayastorLogLevel }} + - name: NVMF_TCP_MAX_QPAIRS_PER_CTRL + # Current recommendation is to set this value to be the number of cores provided to mayastor (see -l argument) plus 1. + value: {{ add .Values.mayastorCpuCount 1 }} + - name: NVMF_TCP_MAX_QUEUE_DEPTH + value: 32 - name: MY_NODE_NAME valueFrom: fieldRef: diff --git a/deploy/csi-daemonset.yaml b/deploy/csi-daemonset.yaml index 6a52fe3a9..5707cb75c 100644 --- a/deploy/csi-daemonset.yaml +++ b/deploy/csi-daemonset.yaml @@ -30,7 +30,7 @@ spec: # the same. containers: - name: mayastor-csi - image: mayadata/mayastor:v1.0.0 + image: mayadata/mayastor:v1.0.1 imagePullPolicy: IfNotPresent # we need privileged because we mount filesystems and use mknod securityContext: diff --git a/deploy/mayastor-daemonset.yaml b/deploy/mayastor-daemonset.yaml index 05f297811..ae8281bb6 100644 --- a/deploy/mayastor-daemonset.yaml +++ b/deploy/mayastor-daemonset.yaml @@ -33,11 +33,16 @@ spec: command: ['sh', '-c', 'until nc -vz nats 4222; do echo "Waiting for message bus..."; sleep 1; done;'] containers: - name: mayastor - image: mayadata/mayastor:v1.0.0 + image: mayadata/mayastor:v1.0.1 imagePullPolicy: IfNotPresent env: - name: RUST_LOG value: info,mayastor=info + - name: NVMF_TCP_MAX_QPAIRS_PER_CTRL + # Current recommendation is to set this value to be the number of cores provided to mayastor (see -l argument) plus 1. + value: 2 + - name: NVMF_TCP_MAX_QUEUE_DEPTH + value: 32 - name: MY_NODE_NAME valueFrom: fieldRef: @@ -50,11 +55,12 @@ spec: # The -l argument accepts cpu-list. Indexing starts at zero. # For example -l 1,2,10-20 means use core 1, 2, 10 to 20. # Note: Ensure that the CPU resources are updated accordingly. - # If you use 2 CPUs, the CPU: field should also read 2. + # If you use 2 CPUs, the CPU: field should also read 2. - "-N$(MY_NODE_NAME)" - "-g$(MY_POD_IP)" - "-nnats" - - "-l1" + - "-y/var/local/mayastor/config.yaml" + - "-l0" - "-pmayastor-etcd" command: - mayastor @@ -73,15 +79,14 @@ spec: # NOTE: Each container must have mem/cpu limits defined in order to # belong to Guaranteed QoS class, hence can never get evicted in case of # pressure unless they exceed those limits. limits and requests must be the same. - # limits: cpu: "1" - memory: "1Gi" - hugepages-2Mi: "2Gi" + memory: "512Mi" + hugepages-2Mi: "1Gi" requests: cpu: "1" - memory: "1Gi" - hugepages-2Mi: "2Gi" + memory: "512Mi" + hugepages-2Mi: "1Gi" ports: - containerPort: 10124 protocol: TCP From 4fabf05f1da7543010b5afb60029ac0b9ef673cd Mon Sep 17 00:00:00 2001 From: Tiago Castro Date: Wed, 16 Feb 2022 18:15:12 +0000 Subject: [PATCH 7/9] chore: update chart to reflect previous deploy configuration --- chart/templates/_helpers.tpl | 2 +- chart/templates/mayastor-daemonset.yaml | 12 ++++++------ deploy/mayastor-daemonset.yaml | 14 +++++++------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/chart/templates/_helpers.tpl b/chart/templates/_helpers.tpl index 34c605d1e..767a57d11 100644 --- a/chart/templates/_helpers.tpl +++ b/chart/templates/_helpers.tpl @@ -13,6 +13,6 @@ {{- if gt $i 0 }} {{- printf "," }} {{- end }} -{{- printf "%d" $i }} +{{- printf "%d" (add $i 1) }} {{- end }} {{- end }} diff --git a/chart/templates/mayastor-daemonset.yaml b/chart/templates/mayastor-daemonset.yaml index c111c1171..f4d84db89 100644 --- a/chart/templates/mayastor-daemonset.yaml +++ b/chart/templates/mayastor-daemonset.yaml @@ -38,9 +38,9 @@ spec: value: info,mayastor={{ .Values.mayastorLogLevel }} - name: NVMF_TCP_MAX_QPAIRS_PER_CTRL # Current recommendation is to set this value to be the number of cores provided to mayastor (see -l argument) plus 1. - value: {{ add .Values.mayastorCpuCount 1 }} + value: "{{ add .Values.mayastorCpuCount 1 }}" - name: NVMF_TCP_MAX_QUEUE_DEPTH - value: 32 + value: "32" - name: MY_NODE_NAME valueFrom: fieldRef: @@ -79,12 +79,12 @@ spec: # pressure unless they exceed those limits. limits and requests must be the same. limits: cpu: "{{ .Values.mayastorCpuCount }}" - memory: "512Mi" - hugepages-2Mi: "{{ .Values.mayastorHugePagesGiB }}Gi" + memory: "1Gi" + hugepages-2Mi: "{{ add .Values.mayastorHugePagesGiB 1 }}Gi" requests: cpu: "{{ .Values.mayastorCpuCount }}" - memory: "512Mi" - hugepages-2Mi: "{{ .Values.mayastorHugePagesGiB }}Gi" + memory: "1Gi" + hugepages-2Mi: "{{ add .Values.mayastorHugePagesGiB 1 }}Gi" ports: - containerPort: 10124 protocol: TCP diff --git a/deploy/mayastor-daemonset.yaml b/deploy/mayastor-daemonset.yaml index ae8281bb6..c26bd4262 100644 --- a/deploy/mayastor-daemonset.yaml +++ b/deploy/mayastor-daemonset.yaml @@ -40,9 +40,9 @@ spec: value: info,mayastor=info - name: NVMF_TCP_MAX_QPAIRS_PER_CTRL # Current recommendation is to set this value to be the number of cores provided to mayastor (see -l argument) plus 1. - value: 2 + value: "2" - name: NVMF_TCP_MAX_QUEUE_DEPTH - value: 32 + value: "32" - name: MY_NODE_NAME valueFrom: fieldRef: @@ -60,7 +60,7 @@ spec: - "-g$(MY_POD_IP)" - "-nnats" - "-y/var/local/mayastor/config.yaml" - - "-l0" + - "-l1" - "-pmayastor-etcd" command: - mayastor @@ -81,12 +81,12 @@ spec: # pressure unless they exceed those limits. limits and requests must be the same. limits: cpu: "1" - memory: "512Mi" - hugepages-2Mi: "1Gi" + memory: "1Gi" + hugepages-2Mi: "2Gi" requests: cpu: "1" - memory: "512Mi" - hugepages-2Mi: "1Gi" + memory: "1Gi" + hugepages-2Mi: "2Gi" ports: - containerPort: 10124 protocol: TCP From df9752542a51ae515567020b66f1bba8315fa009 Mon Sep 17 00:00:00 2001 From: Tiago Castro Date: Thu, 17 Feb 2022 09:57:28 +0000 Subject: [PATCH 8/9] chore: update charts to add 2Gi as a minimum hugepages Rather than adding 1Gi extra just set 2Gi as the minimum. --- chart/templates/mayastor-daemonset.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chart/templates/mayastor-daemonset.yaml b/chart/templates/mayastor-daemonset.yaml index f4d84db89..a946727ae 100644 --- a/chart/templates/mayastor-daemonset.yaml +++ b/chart/templates/mayastor-daemonset.yaml @@ -80,11 +80,11 @@ spec: limits: cpu: "{{ .Values.mayastorCpuCount }}" memory: "1Gi" - hugepages-2Mi: "{{ add .Values.mayastorHugePagesGiB 1 }}Gi" + hugepages-2Mi: "{{ max .Values.mayastorHugePagesGiB 2 }}Gi" requests: cpu: "{{ .Values.mayastorCpuCount }}" memory: "1Gi" - hugepages-2Mi: "{{ add .Values.mayastorHugePagesGiB 1 }}Gi" + hugepages-2Mi: "{{ max .Values.mayastorHugePagesGiB 2 }}Gi" ports: - containerPort: 10124 protocol: TCP From e493695df671b9b94c23cf7f657fb412b7c9cd6f Mon Sep 17 00:00:00 2001 From: Tiago Castro Date: Fri, 18 Feb 2022 11:28:27 +0000 Subject: [PATCH 9/9] chore: don't add default max qpairs per controller Limiting the qpairs to a low value means we cannot then allocate more for the rebuilds. --- chart/templates/mayastor-daemonset.yaml | 3 --- deploy/mayastor-daemonset.yaml | 3 --- 2 files changed, 6 deletions(-) diff --git a/chart/templates/mayastor-daemonset.yaml b/chart/templates/mayastor-daemonset.yaml index a946727ae..b5e7222e4 100644 --- a/chart/templates/mayastor-daemonset.yaml +++ b/chart/templates/mayastor-daemonset.yaml @@ -36,9 +36,6 @@ spec: env: - name: RUST_LOG value: info,mayastor={{ .Values.mayastorLogLevel }} - - name: NVMF_TCP_MAX_QPAIRS_PER_CTRL - # Current recommendation is to set this value to be the number of cores provided to mayastor (see -l argument) plus 1. - value: "{{ add .Values.mayastorCpuCount 1 }}" - name: NVMF_TCP_MAX_QUEUE_DEPTH value: "32" - name: MY_NODE_NAME diff --git a/deploy/mayastor-daemonset.yaml b/deploy/mayastor-daemonset.yaml index c26bd4262..5c9e1adee 100644 --- a/deploy/mayastor-daemonset.yaml +++ b/deploy/mayastor-daemonset.yaml @@ -38,9 +38,6 @@ spec: env: - name: RUST_LOG value: info,mayastor=info - - name: NVMF_TCP_MAX_QPAIRS_PER_CTRL - # Current recommendation is to set this value to be the number of cores provided to mayastor (see -l argument) plus 1. - value: "2" - name: NVMF_TCP_MAX_QUEUE_DEPTH value: "32" - name: MY_NODE_NAME