Skip to content

Commit

Permalink
Bugfix for sensuctl cluster health (#3469)
Browse files Browse the repository at this point in the history
Signed-off-by: Eric Chlebek <[email protected]>
  • Loading branch information
Simon Plourde authored Dec 13, 2019
1 parent 285e09a commit dcf1dbf
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 8 deletions.
6 changes: 3 additions & 3 deletions backend/apid/routers/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ func (r *HealthRouter) health(w http.ResponseWriter, req *http.Request) {
}
ctx := req.Context()
if timeout > 0 {
tctx, cancel := context.WithTimeout(ctx, time.Duration(timeout)*time.Second)
defer cancel()
ctx = tctx
// We're storing the timeout as a value so it can be used by several
// contexts in GetClusterHealth, which is a concurrent gatherer.
ctx = context.WithValue(ctx, "timeout", time.Duration(timeout)*time.Second)
}
clusterHealth := r.controller.GetClusterHealth(ctx)
_ = json.NewEncoder(w).Encode(clusterHealth)
Expand Down
31 changes: 26 additions & 5 deletions backend/store/etcd/health_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,22 @@ func (s *Store) getHealth(ctx context.Context, id uint64, name string, urls []st
func (s *Store) GetClusterHealth(ctx context.Context, cluster clientv3.Cluster, etcdClientTLSConfig *tls.Config) *corev2.HealthResponse {
healthResponse := &corev2.HealthResponse{}

var timeout time.Duration
if val := ctx.Value("timeout"); val != nil {
timeout, _ = val.(time.Duration)
}

// Do a get op against every cluster member. Collect the memberIDs and
// op errors into a response map, and return this map as etcd health
// information.
mList, err := cluster.MemberList(ctx)
tctx := ctx
if timeout > 0 {
var cancel context.CancelFunc
tctx, cancel = context.WithTimeout(ctx, timeout)
defer cancel()
}

mList, err := cluster.MemberList(tctx)
if err != nil {
logger.WithError(err).Error("could not get the cluster member list")
healthResponse.ClusterHealth = []*corev2.ClusterHealth{&corev2.ClusterHealth{
Expand All @@ -96,10 +108,13 @@ func (s *Store) GetClusterHealth(ctx context.Context, cluster clientv3.Cluster,
for _, member := range mList.Members {
go func(id uint64, name string, urls []string) {
defer wg.Done()
select {
case healths <- s.getHealth(ctx, id, name, urls, etcdClientTLSConfig):
case <-ctx.Done():
tctx := ctx
if timeout > 0 {
var cancel context.CancelFunc
tctx, cancel = context.WithTimeout(ctx, timeout)
defer cancel()
}
healths <- s.getHealth(tctx, id, name, urls, etcdClientTLSConfig)
}(member.ID, member.Name, member.ClientURLs)
}

Expand All @@ -112,7 +127,13 @@ func (s *Store) GetClusterHealth(ctx context.Context, cluster clientv3.Cluster,
return healthResponse.ClusterHealth[i].Name < healthResponse.ClusterHealth[j].Name
})

alarmResponse, err := s.client.Maintenance.AlarmList(ctx)
if timeout > 0 {
var cancel context.CancelFunc
tctx, cancel = context.WithTimeout(ctx, timeout)
defer cancel()
}

alarmResponse, err := s.client.Maintenance.AlarmList(tctx)
if err != nil {
logger.WithError(err).Error("failed to fetch etcd alarm list")
} else {
Expand Down
8 changes: 8 additions & 0 deletions backend/store/etcd/health_store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"context"
"crypto/tls"
"testing"
"time"

"github.com/coreos/etcd/clientv3"
"github.com/sensu/sensu-go/backend/store"
Expand All @@ -18,3 +19,10 @@ func TestGetClusterHealth(t *testing.T) {
assert.Empty(t, healthResult.ClusterHealth[0].Err)
})
}

func TestGetClusterHealthTimeout(t *testing.T) {
testWithEtcdClient(t, func(store store.Store, client *clientv3.Client) {
result := store.GetClusterHealth(context.WithValue(context.Background(), "timeout", time.Nanosecond), client.Cluster, (*tls.Config)(nil))
assert.NotEmpty(t, result.ClusterHealth[0].Err)
})
}

0 comments on commit dcf1dbf

Please sign in to comment.