From df43b77b9899cdfb3dd27a88748f43ed9616edef Mon Sep 17 00:00:00 2001 From: Jeffry Molanus Date: Wed, 16 Sep 2020 19:49:16 +0200 Subject: [PATCH] nexus: flush should always succeed During testing and performance benchmarking, it showed that when we create an lvol, it does not support flush. The bdev then is exported as a target -- over nvmf and consumed as replica. By virtue of it being a nvme bdev, it implicitly supports flush, which our lvol does not handle. We must support flush and thus, simply complete the IO. While here, i've removed some of the raw pointer deferences. --- mayastor/src/bdev/nexus/nexus_bdev.rs | 72 +++++-------- mayastor/src/bdev/nexus/nexus_fn_table.rs | 110 ++++++++++---------- mayastor/src/bdev/nexus/nexus_io.rs | 117 +++++++++++----------- 3 files changed, 134 insertions(+), 165 deletions(-) diff --git a/mayastor/src/bdev/nexus/nexus_bdev.rs b/mayastor/src/bdev/nexus/nexus_bdev.rs index e3148ba16..285d56369 100644 --- a/mayastor/src/bdev/nexus/nexus_bdev.rs +++ b/mayastor/src/bdev/nexus/nexus_bdev.rs @@ -19,7 +19,6 @@ use tonic::{Code, Status}; use spdk_sys::{ spdk_bdev, spdk_bdev_desc, - spdk_bdev_flush_blocks, spdk_bdev_io, spdk_bdev_io_get_buf, spdk_bdev_nvme_admin_passthru, @@ -640,22 +639,23 @@ impl Nexus { success: bool, parent_io: *mut c_void, ) { - let mut pio = Bio(parent_io as *mut _); + let mut pio = Bio::from(parent_io); + let mut chio = Bio::from(child_io); // if any child IO has failed record this within the io context if !success { trace!( "child IO {:?} ({}) of parent {:?} failed", - Bio(child_io), - (*child_io).type_, + chio, + chio.io_type(), pio ); pio.ctx_as_mut_ref().status = io_status::FAILED; } - pio.assess(child_io, success); + pio.assess(&mut chio, success); // always free the child IO - Bio::io_free(child_io); + chio.free(); } /// callback when the IO has buffer associated with itself @@ -665,7 +665,7 @@ impl Nexus { success: bool, ) { if !success { - let bio = Bio(io); + let bio = Bio::from(io); let nexus = bio.nexus_as_ref(); warn!("{}: Failed to get io buffer for io {:?}", nexus.name, bio); } @@ -674,7 +674,7 @@ impl Nexus { let (desc, ch) = ch.ch[ch.previous].io_tuple(); let ret = Self::readv_impl(io, desc, ch); if ret != 0 { - let bio = Bio(io); + let bio = Bio::from(io); let nexus = bio.nexus_as_ref(); error!("{}: Failed to submit IO {:?}", nexus.name, bio); } @@ -691,7 +691,7 @@ impl Nexus { if io.need_buf() { unsafe { spdk_bdev_io_get_buf( - io.0, + io.as_ptr(), Some(Self::nexus_get_buf_cb), io.num_blocks() * io.block_len(), ) @@ -701,13 +701,13 @@ impl Nexus { let (desc, ch) = channels.ch[child].io_tuple(); - let ret = Self::readv_impl(io.0, desc, ch); + let ret = Self::readv_impl(io.as_ptr(), desc, ch); if ret != 0 { error!( "{}: Failed to submit dispatched IO {:p}", io.nexus_as_ref().name, - io.0 + io.as_ptr() ); io.fail(); @@ -721,7 +721,7 @@ impl Nexus { desc: *mut spdk_bdev_desc, ch: *mut spdk_io_channel, ) -> i32 { - let io = Bio(pio); + let io = Bio::from(pio); let nexus = io.nexus_as_ref(); unsafe { spdk_bdev_readv_blocks( @@ -732,7 +732,7 @@ impl Nexus { io.offset() + nexus.data_ent_offset, io.num_blocks(), Some(Self::io_completion), - io.0 as *mut _, + io.as_ptr() as *mut _, ) } } @@ -750,7 +750,7 @@ impl Nexus { bdev, chan, Some(Self::io_completion), - io.0 as *mut _, + io.as_ptr() as *mut _, ) }) .collect::>(); @@ -760,7 +760,7 @@ impl Nexus { error!( "{}: Failed to submit dispatched IO {:?}", io.nexus_as_ref().name, - io.0 + io.as_ptr(), ); } } @@ -781,7 +781,7 @@ impl Nexus { io.offset() + io.nexus_as_ref().data_ent_offset, io.num_blocks(), Some(Self::io_completion), - io.0 as *mut _, + io.as_ptr() as *mut _, ) }) .collect::>(); @@ -791,7 +791,7 @@ impl Nexus { error!( "{}: Failed to submit dispatched IO {:?}", io.nexus_as_ref().name, - io.0 + io.as_ptr() ); } } @@ -808,7 +808,7 @@ impl Nexus { io.offset() + io.nexus_as_ref().data_ent_offset, io.num_blocks(), Some(Self::io_completion), - io.0 as *mut _, + io.as_ptr() as *mut _, ) }) .collect::>(); @@ -817,33 +817,7 @@ impl Nexus { error!( "{}: Failed to submit dispatched IO {:?}", io.nexus_as_ref().name, - io.0 - ); - } - } - - pub(crate) fn flush(&self, io: &Bio, channels: &NexusChannelInner) { - let results = channels - .ch - .iter() - .map(|c| unsafe { - let (b, c) = c.io_tuple(); - spdk_bdev_flush_blocks( - b, - c, - io.offset() + io.nexus_as_ref().data_ent_offset, - io.num_blocks(), - Some(Self::io_completion), - io.0 as *mut _, - ) - }) - .collect::>(); - - if results.iter().any(|r| *r != 0) { - error!( - "{}: Failed to submit dispatched IO {:?}", - io.nexus_as_ref().name, - io.0 + io.as_ptr() ); } } @@ -860,7 +834,7 @@ impl Nexus { io.offset() + io.nexus_as_ref().data_ent_offset, io.num_blocks(), Some(Self::io_completion), - io.0 as *mut _, + io.as_ptr() as *mut _, ) }) .collect::>(); @@ -869,7 +843,7 @@ impl Nexus { error!( "{}: Failed to submit dispatched IO {:?}", io.nexus_as_ref().name, - io.0 + io.as_ptr() ); } } @@ -893,7 +867,7 @@ impl Nexus { io.nvme_buf(), io.nvme_nbytes(), Some(Self::io_completion), - io.0 as *mut _, + io.as_ptr() as *mut _, ) }) .collect::>(); @@ -902,7 +876,7 @@ impl Nexus { error!( "{}: Failed to submit dispatched IO {:?}", io.nexus_as_ref().name, - io.0 + io.as_ptr() ); } } diff --git a/mayastor/src/bdev/nexus/nexus_fn_table.rs b/mayastor/src/bdev/nexus/nexus_fn_table.rs index 3847b2c6b..ac12913fc 100644 --- a/mayastor/src/bdev/nexus/nexus_fn_table.rs +++ b/mayastor/src/bdev/nexus/nexus_fn_table.rs @@ -96,71 +96,67 @@ impl NexusFnTable { io: *mut spdk_bdev_io, ) { // only set the number of IO attempts before the first attempt - Bio::init(io, channel, Bio(io).nexus_as_ref().max_io_attempts); - Self::io_submit_or_resubmit(channel, io); + let mut bio = Bio::from(io); + bio.init(); + Self::io_submit_or_resubmit(channel, &mut bio); } /// Submit an IO to the children at the first or subsequent attempts. - pub fn io_submit_or_resubmit( + pub(crate) fn io_submit_or_resubmit( channel: *mut spdk_io_channel, - io: *mut spdk_bdev_io, + nio: &mut Bio, ) { - if let Some(io_type) = Bio::io_type(io) { - let mut ch = NexusChannel::inner_from_channel(channel); - - // set the fields that need to be (re)set per-attempt - match io_type { - io_type::READ => Bio::reset(io, 1), - _ => Bio::reset(io, ch.ch.len() as i8), - }; - - let nio = Bio(io); - let nexus = nio.nexus_as_ref(); - - match io_type { - io_type::READ => nexus.readv(&nio, &mut ch), - io_type::WRITE => nexus.writev(&nio, &ch), - io_type::RESET => { - trace!("{}: Dispatching RESET", nexus.bdev.name()); - nexus.reset(&nio, &ch) - } - io_type::UNMAP => { - if nexus.io_is_supported(io_type) { - nexus.unmap(&nio, &ch) - } else { - nio.fail(); - } - } - io_type::FLUSH => { - if nexus.io_is_supported(io_type) { - nexus.flush(&nio, &ch) - } else { - nio.fail() - } + let mut ch = NexusChannel::inner_from_channel(channel); + + // set the fields that need to be (re)set per-attempt + if nio.io_type() == io_type::READ { + nio.reset(1); + } else { + nio.reset(ch.ch.len()) + } + + let nexus = nio.nexus_as_ref(); + let io_type = nio.io_type(); + match io_type { + io_type::READ => nexus.readv(&nio, &mut ch), + io_type::WRITE => nexus.writev(&nio, &ch), + io_type::RESET => { + trace!("{}: Dispatching RESET", nexus.bdev.name()); + nexus.reset(&nio, &ch) + } + io_type::UNMAP => { + if nexus.io_is_supported(io_type) { + nexus.unmap(&nio, &ch) + } else { + nio.fail(); } - io_type::WRITE_ZEROES => { - if nexus.io_is_supported(io_type) { - nexus.write_zeroes(&nio, &ch) - } else { - nio.fail() - } + } + io_type::FLUSH => { + // our replica's are attached to as nvme controllers + // who always support flush. This can be troublesome + // so we complete the IO directly. + nio.reset(0); + nio.ok(); + } + io_type::WRITE_ZEROES => { + if nexus.io_is_supported(io_type) { + nexus.write_zeroes(&nio, &ch) + } else { + nio.fail() } - io_type::NVME_ADMIN => { - if nexus.io_is_supported(io_type) { - nexus.nvme_admin(&nio, &ch) - } else { - nio.fail() - } + } + io_type::NVME_ADMIN => { + if nexus.io_is_supported(io_type) { + nexus.nvme_admin(&nio, &ch) + } else { + nio.fail() } - _ => panic!( - "{} Received unsupported IO! type {}", - nexus.name, io_type - ), - }; - } else { - // something is very wrong ... - error!("Received unknown IO type {}", unsafe { (*io).type_ }); - } + } + _ => panic!( + "{} Received unsupported IO! type {}", + nexus.name, io_type + ), + }; } /// called per core to create IO channels per Nexus instance diff --git a/mayastor/src/bdev/nexus/nexus_io.rs b/mayastor/src/bdev/nexus/nexus_io.rs index 238efa041..7e5381e2c 100644 --- a/mayastor/src/bdev/nexus/nexus_io.rs +++ b/mayastor/src/bdev/nexus/nexus_io.rs @@ -7,6 +7,7 @@ use spdk_sys::{ spdk_bdev_free_io, spdk_bdev_io, spdk_bdev_io_complete, + spdk_bdev_io_get_io_channel, spdk_io_channel, }; @@ -17,6 +18,7 @@ use crate::{ }, core::Bdev, }; +use std::ptr::NonNull; /// NioCtx provides context on a per IO basis #[derive(Debug, Clone)] @@ -27,8 +29,6 @@ pub struct NioCtx { pub(crate) status: i32, /// attempts left pub(crate) io_attempts: i32, - /// pointer to the inner channels - pub(crate) nexus_channel: *mut spdk_io_channel, } /// BIO is a wrapper to provides a "less unsafe" wrappers around raw @@ -52,7 +52,20 @@ pub struct NioCtx { /// /// 2. The IO pointers are never accessed from any other thread /// and care must be taken that you never pass an IO ptr to another core -pub(crate) struct Bio(pub *mut spdk_bdev_io); +#[derive(Clone)] +pub(crate) struct Bio(NonNull); + +impl From<*mut c_void> for Bio { + fn from(io: *mut c_void) -> Self { + Bio(NonNull::new(io as *mut spdk_bdev_io).unwrap()) + } +} + +impl From<*mut spdk_bdev_io> for Bio { + fn from(io: *mut spdk_bdev_io) -> Self { + Bio(NonNull::new(io as *mut spdk_bdev_io).unwrap()) + } +} /// redefinition of IO types to make them (a) shorter and (b) get rid of the /// enum conversion bloat. @@ -96,25 +109,22 @@ pub mod nvme_admin_opc { impl Bio { /// obtain tbe Bdev this IO is associated with pub(crate) fn bdev_as_ref(&self) -> Bdev { - unsafe { Bdev::from((*self.0).bdev) } + unsafe { Bdev::from(self.0.as_ref().bdev) } + } + + pub(crate) fn io_channel(&self) -> *mut spdk_io_channel { + unsafe { spdk_bdev_io_get_io_channel(self.0.as_ptr()) } } /// initialize the ctx fields of an spdk_bdev_io - pub fn init( - io: *mut spdk_bdev_io, - nexus_channel: *mut spdk_io_channel, - io_attempts: i32, - ) { - let mut bio = Bio(io); - bio.ctx_as_mut_ref().io_attempts = io_attempts; - bio.ctx_as_mut_ref().nexus_channel = nexus_channel; + pub fn init(&mut self) { + self.ctx_as_mut_ref().io_attempts = self.nexus_as_ref().max_io_attempts; } /// reset the ctx fields of an spdk_bdev_io to submit or resubmit an IO - pub fn reset(io: *mut spdk_bdev_io, in_flight: i8) { - let mut bio = Bio(io); - bio.ctx_as_mut_ref().in_flight = in_flight; - bio.ctx_as_mut_ref().status = io_status::SUCCESS; + pub fn reset(&mut self, in_flight: usize) { + self.ctx_as_mut_ref().in_flight = in_flight as i8; + self.ctx_as_mut_ref().status = io_status::SUCCESS; } /// complete an IO for the nexus. In the IO completion routine in @@ -134,42 +144,34 @@ impl Bio { } } unsafe { - spdk_bdev_io_complete(self.0, io_status::SUCCESS); + spdk_bdev_io_complete(self.0.as_ptr(), io_status::SUCCESS); } } /// mark the IO as failed #[inline] pub(crate) fn fail(&self) { unsafe { - spdk_bdev_io_complete(self.0, io_status::FAILED); + spdk_bdev_io_complete(self.0.as_ptr(), io_status::FAILED); } } /// assess the IO if we need to mark it failed or ok. #[inline] - pub(crate) fn assess( - &mut self, - child_io: *const spdk_bdev_io, - success: bool, - ) { + pub(crate) fn assess(&mut self, child_io: &mut Bio, success: bool) { self.ctx_as_mut_ref().in_flight -= 1; debug_assert!(self.ctx_as_mut_ref().in_flight >= 0); - if !success && !child_io.is_null() { - let io_type = Bio::io_type(self.0).unwrap(); + if !success { let io_offset = self.offset(); let io_num_blocks = self.num_blocks(); - - unsafe { - self.nexus_as_ref().error_record_add( - (*child_io).bdev, - io_type, - io_status::FAILED, - io_offset, - io_num_blocks, - ); - } + self.nexus_as_ref().error_record_add( + child_io.bdev_as_ref().as_ptr(), + self.io_type(), + io_status::FAILED, + io_offset, + io_num_blocks, + ); } if self.ctx_as_mut_ref().in_flight == 0 { @@ -177,8 +179,8 @@ impl Bio { self.ctx_as_mut_ref().io_attempts -= 1; if self.ctx_as_mut_ref().io_attempts > 0 { NexusFnTable::io_submit_or_resubmit( - self.ctx_as_mut_ref().nexus_channel, - self.0, + self.io_channel(), + &mut self.clone(), ); } else { self.fail(); @@ -201,74 +203,67 @@ impl Bio { #[inline] pub(crate) fn ctx_as_mut_ref(&mut self) -> &mut NioCtx { unsafe { - &mut *((*self.0).driver_ctx.as_mut_ptr() as *const c_void - as *mut NioCtx) + &mut *(self.0.as_mut().driver_ctx.as_mut_ptr() as *mut NioCtx) } } /// get a raw pointer to the base of the iov #[inline] pub(crate) fn iovs(&self) -> *mut spdk_sys::iovec { - unsafe { (*self.0).u.bdev.iovs } + unsafe { self.0.as_ref().u.bdev.iovs } } /// number of iovs that are part of this IO #[inline] pub(crate) fn iov_count(&self) -> i32 { - unsafe { (*self.0).u.bdev.iovcnt } + unsafe { self.0.as_ref().u.bdev.iovcnt } } /// offset where we do the IO on the device #[inline] pub(crate) fn offset(&self) -> u64 { - unsafe { (*self.0).u.bdev.offset_blocks } + unsafe { self.0.as_ref().u.bdev.offset_blocks } } /// num of blocks this IO will read/write/unmap #[inline] pub(crate) fn num_blocks(&self) -> u64 { - unsafe { (*self.0).u.bdev.num_blocks } + unsafe { self.0.as_ref().u.bdev.num_blocks } } /// NVMe passthru command #[inline] pub(crate) fn nvme_cmd(&self) -> spdk_sys::spdk_nvme_cmd { - unsafe { (*self.0).u.nvme_passthru.cmd } + unsafe { self.0.as_ref().u.nvme_passthru.cmd } } /// raw pointer to NVMe passthru data buffer #[inline] pub(crate) fn nvme_buf(&self) -> *mut c_void { - unsafe { (*self.0).u.nvme_passthru.buf } + unsafe { self.0.as_ref().u.nvme_passthru.buf as *mut _ } } /// NVMe passthru number of bytes to transfer #[inline] pub(crate) fn nvme_nbytes(&self) -> u64 { - unsafe { (*self.0).u.nvme_passthru.nbytes } + unsafe { self.0.as_ref().u.nvme_passthru.nbytes } } - /// free the io directly without completion note that the IO is not freed - /// but rather put back into the mempool, which is allocated during startup - #[inline] - pub(crate) fn io_free(io: *mut spdk_bdev_io) { - unsafe { spdk_bdev_free_io(io) } + /// free the IO + pub(crate) fn free(&self) { + unsafe { spdk_bdev_free_io(self.0.as_ptr()) } } /// determine the type of this IO #[inline] - pub(crate) fn io_type(io: *mut spdk_bdev_io) -> Option { - if io.is_null() { - trace!("io is null!!"); - return None; - } - Some(unsafe { (*io).type_ } as u32) + pub(crate) fn io_type(&self) -> u32 { + unsafe { self.0.as_ref().type_ as u32 } } /// get the block length of this IO #[inline] pub(crate) fn block_len(&self) -> u64 { - unsafe { u64::from((*(*self.0).bdev).blocklen) } + self.bdev_as_ref().block_len() as u64 } /// determine if the IO needs an indirect buffer this can happen for example @@ -284,6 +279,10 @@ impl Bio { slice[0].iov_base.is_null() } } + + pub(crate) fn as_ptr(&self) -> *mut spdk_bdev_io { + self.0.as_ptr() + } } impl Debug for Bio { @@ -294,7 +293,7 @@ impl Debug for Bio { self.bdev_as_ref().name(), self.offset(), self.num_blocks(), - Bio::io_type(self.0).unwrap(), + self.io_type(), self ) }