Skip to content

Commit

Permalink
feat: Parameterize pod restart count (#18)
Browse files Browse the repository at this point in the history
Co-authored-by: able8 <[email protected]>
  • Loading branch information
able8 and able8 authored Jan 3, 2023
1 parent e638848 commit a974572
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 6 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).


## [1.2.0] - 2023-01-03
### Added
- Parameterize pod restart count

## [1.1.0] - 2022-09-19
### Added
- Support ignoring specific namespaces and pods
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ helm uninstall k8s-pod-restart-info-collector
| `slackUsername` | Slack username (Display on slack message) | default: `"k8s-pod-restart-info-collector"` |
| `slackChannel` | Slack channel name | default: `"restart-info-nonprod"` |
| `muteSeconds` | The time to mute duplicate pod alerts | default: `"600"`
| `ignoreRestartCount` | The number of pod restart count to ignore | default: `"30"`
| `ignoredNamespaces` | A comma-separated list of namespaces to ignore | default: `""`
| `ignoredPodNamePrefixes` | A comma-separated list of pod name prefixes to ignore | default: `""`
| `slackWebhookUrl` | Slack webhook URL | required if slackWebhooUrlSecretKeyRef is not present |
Expand Down
2 changes: 1 addition & 1 deletion build.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
TAG="v1.1.0"
TAG="v1.2.0"
docker buildx build --platform linux/amd64 -t devopsairwallex/k8s-pod-restart-info-collector:${TAG} .
docker push devopsairwallex/k8s-pod-restart-info-collector:${TAG}

Expand Down
9 changes: 4 additions & 5 deletions controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ type Controller struct {
// NewController creates a new Controller.
func NewController(clientset kubernetes.Interface, slack Slack) *Controller {
const resyncPeriod = 0
ignoreRestartCount := getIgnoreRestartCount()

queue := workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter())
informerFactory := informers.NewSharedInformerFactory(clientset, resyncPeriod)
Expand All @@ -61,12 +62,10 @@ func NewController(clientset kubernetes.Interface, slack Slack) *Controller {
return
}

klog.Infof("Update: %s/%s\n", newPod.Namespace, newPod.Name)

newPodRestartCount := getPodRestartCount(newPod)
// Ignore when restartCount > 30
if newPodRestartCount > 30 {
klog.Infof("Ignore: %s/%s restartCount: %d > 30\n", newPod.Namespace, newPod.Name, newPodRestartCount)
// Ignore when restartCount > ignoreRestartCount
if newPodRestartCount > ignoreRestartCount {
klog.Infof("Ignore: %s/%s restartCount: %d > %d\n", newPod.Namespace, newPod.Name, newPodRestartCount, ignoreRestartCount)
return
}

Expand Down
2 changes: 2 additions & 0 deletions helm/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ spec:
value: {{ .Values.slackUsername | quote}}
- name: MUTE_SECONDS
value: {{ .Values.muteSeconds | quote}}
- name: IGNORE_RESTART_COUNT
value: {{ .Values.ignoreRestartCount | quote}}
- name: IGNORED_NAMESPACES
value: {{ .Values.ignoredNamespaces | quote}}
- name: IGNORED_POD_NAME_PREFIXES
Expand Down
1 change: 1 addition & 0 deletions helm/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
slackChannel: "restart-info-nonprod"
slackUsername: "k8s-pod-restart-info-collector"
muteSeconds: 600
ignoreRestartCount: 30

# A comma-separated list of namespaces to ignore
ignoredNamespaces: ""
Expand Down
9 changes: 9 additions & 0 deletions helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,15 @@ func isIgnoredPod(name string) bool {
return false
}

func getIgnoreRestartCount() int {
ignoreRestartCount, err := strconv.Atoi(os.Getenv("IGNORE_RESTART_COUNT"))
if err != nil {
ignoreRestartCount = 30
klog.Warningf("Environment variable IGNORE_RESTART_COUNT is not set, default: %d\n", ignoreRestartCount)
}
return ignoreRestartCount
}

func printPod(pod *v1.Pod) (string, error) {
restarts := 0
totalContainers := len(pod.Spec.Containers)
Expand Down

0 comments on commit a974572

Please sign in to comment.