Skip to content

Commit

Permalink
Maintain thread affinity during non-TKO failovers
Browse files Browse the repository at this point in the history
Summary:
When failing over, make an educated guess as to whether thread affinity still applies to the failover tier. This is done by inspecting the failover result code. It thread affinity is likely to be broken, `SRRoute` will attempt to jump threads to restore affinity.

TKO errors should not affect thread affinity, as `traverse()` is aware of TKO hosts and should be able to predict a failover in that case:
```
FailoverRoute
  |-> traverse(): failover1
  `-> route()
       |-> primary:   TKO
       `-> failover1: SUCCESS  // <- thread affinity
```
```
FailoverRoute
  |-> traverse(): failover2
  `-> route()
       |-> primary:   TKO
       |-> failover1: TKO
       `-> failover2: SUCCESS  // <- thread affinity
```
A non-TKO error, on the other hand, will break thread affinity. For instance, during S459689:
```
FailoverRoute
  |-> traverse(): failover2
  `-> route()
       |-> primary:   TKO
       |-> failover1: BUSY  // <- original thread affinity
       `-> failover2: SUCCESS  // jump threads
```

Reviewed By: disylh, stuclar

Differential Revision: D65231378

fbshipit-source-id: be9f40aa08eb20d07374064ad99c09a63158b653
  • Loading branch information
Emanuele Altieri authored and facebook-github-bot committed Nov 11, 2024
1 parent cbf97f7 commit 85facb9
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 3 deletions.
2 changes: 2 additions & 0 deletions mcrouter/CarbonRouterClient-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,8 @@ CarbonRouterClient<RouterInfo>::makeProxyRequestContext(
});

proxyRequestContext->setRequester(self_);
proxyRequestContext->setThreadAffinity(
mode_ == ThreadMode::AffinitizedRemoteThread);
return proxyRequestContext;
}

Expand Down
6 changes: 3 additions & 3 deletions mcrouter/CarbonRouterClient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ bool srHostInfoPtrFuncCarbonRouterClient(
const HostInfoPtr& host,
const RequestClass& requestClass,
uint64_t& hash) {
if (!requestClass.is(RequestClass::kShadow) && host &&
host->location().getTWTaskID()) {
hash = *host->location().getTWTaskID();
if (!requestClass.is(RequestClass::kShadow) && host) {
// Host unique key is derived from IP and port
hash = host->location().hostUniqueKey();
return true;
}
return false;
Expand Down
4 changes: 4 additions & 0 deletions mcrouter/CarbonRouterInstance.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,10 @@ class CarbonRouterInstance
cpuStatsWorker_.reset();
}

Proxy<RouterInfo>& getProxyFromHash(size_t hash) {
return *proxies_[hash % proxies_.size()];
}

CarbonRouterInstance(const CarbonRouterInstance&) = delete;
CarbonRouterInstance& operator=(const CarbonRouterInstance&) = delete;
CarbonRouterInstance(CarbonRouterInstance&&) noexcept = delete;
Expand Down
17 changes: 17 additions & 0 deletions mcrouter/McrouterFiberContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ class fiber_local {
std::bitset<NUM_FLAGS> featureFlags;
int32_t selectedIndex{-1};
uint32_t failoverCount{0};
bool jumpThreads{false};
std::optional<uint64_t> bucketId;
std::optional<std::string> distributionTargetRegion;
RequestClass requestClass;
Expand Down Expand Up @@ -216,6 +217,22 @@ class fiber_local {
return folly::fibers::local<McrouterFiberContext>().failoverCount;
}

/**
* Set when failing over may require the request to be moved to a different
* event base.
*/
static void enableJumpThreads() {
folly::fibers::local<McrouterFiberContext>().jumpThreads = true;
}

/**
* Check whether the request should be moved to a different eventbase as a
* result of failing over.
*/
static bool shouldJumpThreads() {
return folly::fibers::local<McrouterFiberContext>().jumpThreads;
}

/**
* Accumulate latency injected before request for current fiber and return the
* new value
Expand Down
9 changes: 9 additions & 0 deletions mcrouter/ProxyRequestContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,12 +173,21 @@ class ProxyRequestContext {
reqContextScopeGuard_.reset();
}

void setThreadAffinity(bool enabled) {
threadAffinity_ = enabled;
}

bool isThreadAffinityEnabled() const {
return threadAffinity_;
}

protected:
// Keep on first cacheline. Used by ProxyRequestContextTyped
const void* ptr_{nullptr};
carbon::Result finalResult_{carbon::Result::UNKNOWN};
int32_t poolStatIndex_{-1};
bool replied_{false};
bool threadAffinity_{false};

ProxyRequestContext(
ProxyBase& pr,
Expand Down
10 changes: 10 additions & 0 deletions mcrouter/routes/FailoverRoute.h
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,16 @@ class FailoverRoute {
}
}

// Check whether we may be on the wrong thread for the failover host.
//
// Thread affinity is based on the destination host discovered by
// traverse(). TKO states are taken into account by traverse(), so TKO
// errors should not affect thread affinity.
//
if (*reply.result_ref() != carbon::Result::TKO) {
fiber_local<RouterInfo>::enableJumpThreads();
}

return false;
}

Expand Down

0 comments on commit 85facb9

Please sign in to comment.