Skip to content

Commit

Permalink
chore: unify connection timeout config
Browse files Browse the repository at this point in the history
  • Loading branch information
tclxyxj25245 committed Nov 15, 2024
1 parent 19ea612 commit 3425690
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 11 deletions.
6 changes: 3 additions & 3 deletions kvrocks.conf
Original file line number Diff line number Diff line change
Expand Up @@ -174,19 +174,19 @@ slave-read-only yes
# By default the priority is 100.
slave-priority 100

# Change the default timeout in milliseconds for socket connect during fullsync.
# Change the default timeout in milliseconds for socket connect during replication.
# The default value is 3100, and 0 means no timeout.
#
# If the master is unreachable before connecting, not having a timeout may block future
# 'clusterx setnodes' commands because the replication thread is blocked on connect.
slave-fullsync-connect-timeout-ms 3100
replication-connect-timeout-ms 3100

# Change the default timeout in milliseconds for socket recv during fullsync.
# The default value is 3200, and 0 means no timeout.
#
# If the master is unreachable when fetching SST files, not having a timeout may block
# future 'clusterx setnodes' commands because the replication thread is blocked on recv.
slave-fullsync-recv-timeout-ms 3200
replication-recv-timeout-ms 3200

# TCP listen() backlog.
#
Expand Down
7 changes: 3 additions & 4 deletions src/cluster/replication.cc
Original file line number Diff line number Diff line change
Expand Up @@ -252,15 +252,14 @@ void ReplicationThread::CallbacksStateMachine::Start() {
}

uint64_t last_connect_timestamp = 0;
int connect_timeout_ms = 3100;

while (!repl_->stop_flag_ && bev == nullptr) {
if (util::GetTimeStampMS() - last_connect_timestamp < 1000) {
// prevent frequent re-connect when the master is down with the connection refused error
sleep(1);
}
last_connect_timestamp = util::GetTimeStampMS();
auto cfd = util::SockConnect(repl_->host_, repl_->port_, connect_timeout_ms);
auto cfd = util::SockConnect(repl_->host_, repl_->port_, repl_->srv_->GetConfig()->replication_connect_timeout_ms);
if (!cfd) {
LOG(ERROR) << "[replication] Failed to connect the master, err: " << cfd.Msg();
continue;
Expand Down Expand Up @@ -778,8 +777,8 @@ Status ReplicationThread::parallelFetchFile(const std::string &dir,
auto exit = MakeScopeExit([ssl] { SSL_free(ssl); });
#endif
int sock_fd = GET_OR_RET(util::SockConnect(this->host_, this->port_, ssl,
this->srv_->GetConfig()->slave_fullsync_connect_timeout_ms,
this->srv_->GetConfig()->slave_fullsync_recv_timeout_ms)
this->srv_->GetConfig()->replication_connect_timeout_ms,
this->srv_->GetConfig()->replication_recv_timeout_ms)
.Prefixed("connect the server err"));
#ifdef ENABLE_OPENSSL
exit.Disable();
Expand Down
4 changes: 2 additions & 2 deletions src/config/config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,8 @@ Config::Config() {
{"slave-empty-db-before-fullsync", false, new YesNoField(&slave_empty_db_before_fullsync, false)},
{"slave-priority", false, new IntField(&slave_priority, 100, 0, INT_MAX)},
{"slave-read-only", false, new YesNoField(&slave_readonly, true)},
{"slave-fullsync-connect-timeout-ms", false, new IntField(&slave_fullsync_connect_timeout_ms, 3100, 0, INT_MAX)},
{"slave-fullsync-recv-timeout-ms", false, new IntField(&slave_fullsync_recv_timeout_ms, 3200, 0, INT_MAX)},
{"replication-connect-timeout-ms", false, new IntField(&replication_connect_timeout_ms, 3100, 0, INT_MAX)},
{"replication-recv-timeout-ms", false, new IntField(&replication_recv_timeout_ms, 3200, 0, INT_MAX)},
{"use-rsid-psync", true, new YesNoField(&use_rsid_psync, false)},
{"profiling-sample-ratio", false, new IntField(&profiling_sample_ratio, 0, 0, 100)},
{"profiling-sample-record-max-len", false, new IntField(&profiling_sample_record_max_len, 256, 0, INT_MAX)},
Expand Down
4 changes: 2 additions & 2 deletions src/config/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ struct Config {
bool slave_readonly = true;
bool slave_serve_stale_data = true;
bool slave_empty_db_before_fullsync = false;
int slave_fullsync_connect_timeout_ms = 3200;
int slave_fullsync_recv_timeout_ms = 3100;
int slave_priority = 100;
int replication_connect_timeout_ms = 3200;
int replication_recv_timeout_ms = 3100;
int max_db_size = 0;
int max_replication_mb = 0;
int max_io_mb = 0;
Expand Down

0 comments on commit 3425690

Please sign in to comment.