feat: Add a rds-instance-stop chaos fault

Signed-off-by: Jongwoo Han <[email protected]>
litmuschaos · Sep 6, 2024 · 09a8112 · 09a8112
1 parent 160f11c
commit 09a8112
Show file tree

Hide file tree

Showing 7 changed files with 281 additions and 0 deletions.
diff --git a/faults/aws/aws.chartserviceversion.yaml b/faults/aws/aws.chartserviceversion.yaml
@@ -34,6 +34,9 @@ spec:
     - name: ebs-loss-by-tag
       description: It detach the EBS volume from AWS EC2 instance identified by Tag for a certain chaos duration.
       displayName: "EBS Loss By Tag"
+    - name: rds-instance-stop
+      description: It stops the RDS instance identified by instance identifier.
+      displayName: "RDS Instance Stop"
   keywords:
     - AWS
   maintainers:

diff --git a/faults/aws/aws.package.yaml b/faults/aws/aws.package.yaml
@@ -22,3 +22,6 @@ faults:
   - name: ebs-loss-by-tag
     CSV: ebs-loss-by-tag.chartserviceversion.yaml
     desc: "ebs-loss-by-tag"
+  - name: rds-instance-stop
+    CSV: rds-instance-stop.chartserviceversion.yaml
+    desc: "rds-instance-stop"
diff --git a/faults/aws/experiments.yaml b/faults/aws/experiments.yaml
@@ -779,3 +779,107 @@ spec:
         mountPath: /tmp/
 
 ---
+---
+apiVersion: litmuschaos.io/v1alpha1
+description:
+  message: |
+    Stopping an RDS instance identified by instance identifier.
+kind: ChaosExperiment
+metadata:
+  name: rds-instance-stop
+  labels:
+    name: rds-instance-stop
+    app.kubernetes.io/part-of: litmus
+    app.kubernetes.io/component: chaosexperiment
+    app.kubernetes.io/version: ci
+spec:
+  definition:
+    scope: Cluster
+    permissions:
+      # Create and monitor the experiment & helper pods
+      - apiGroups: [""]
+        resources: ["pods"]
+        verbs:
+          [
+            "create",
+            "delete",
+            "get",
+            "list",
+            "patch",
+            "update",
+            "deletecollection",
+          ]
+      # Performs CRUD operations on the events inside chaosengine and chaosresult
+      - apiGroups: [""]
+        resources: ["events"]
+        verbs: ["create", "get", "list", "patch", "update"]
+      # Fetch configmaps & secrets details and mount it to the experiment pod (if specified)
+      - apiGroups: [""]
+        resources: ["secrets", "configmaps"]
+        verbs: ["get", "list"]
+      # Track and get the runner, experiment, and helper pods log
+      - apiGroups: [""]
+        resources: ["pods/log"]
+        verbs: ["get", "list", "watch"]
+      # for creating and managing to execute commands inside target container
+      - apiGroups: [""]
+        resources: ["pods/exec"]
+        verbs: ["get", "list", "create"]
+      # for configuring and monitor the experiment job by the chaos-runner pod
+      - apiGroups: ["batch"]
+        resources: ["jobs"]
+        verbs: ["create", "list", "get", "delete", "deletecollection"]
+      # for creation, status polling and deletion of litmus chaos resources used within a chaos workflow
+      - apiGroups: ["litmuschaos.io"]
+        resources: ["chaosengines", "chaosexperiments", "chaosresults"]
+        verbs: ["create", "list", "get", "patch", "update", "delete"]
+      # for experiment to perform node status checks
+      - apiGroups: [""]
+        resources: ["nodes"]
+        verbs: ["get", "list"]
+    image: "litmuschaos.docker.scarf.sh/litmuschaos/go-runner:latest"
+    imagePullPolicy: Always
+    args:
+      - -c
+      - ./experiments -name rds-instance-stop
+    command:
+      - /bin/bash
+    env:
+      - name: TOTAL_CHAOS_DURATION
+        value: "30"
+
+      - name: CHAOS_INTERVAL
+        value: "30"
+
+      # Period to wait before and after injection of chaos in sec
+      - name: RAMP_TIME
+        value: ""
+
+      # Provide the RDS instance identifier
+      - name: RDS_INSTANCE_IDENTIFIER
+        value: ""
+
+      - name: REGION
+        value: ""
+
+      - name: INSTANCE_AFFECTED_PERC
+        value: ""
+
+      - name: SEQUENCE
+        value: "parallel"
+
+      - name: DEFAULT_HEALTH_CHECK
+        value: "false"
+
+      # Provide the path of AWS credentials mounted from secret
+      - name: AWS_SHARED_CREDENTIALS_FILE
+        value: "/tmp/cloud_config.yml"
+
+    labels:
+      name: rds-instance-stop
+      app.kubernetes.io/part-of: litmus
+      app.kubernetes.io/component: experiment-job
+      app.kubernetes.io/version: ci
+    secrets:
+      - name: cloud-secret
+        mountPath: /tmp/
diff --git a/faults/aws/icons/rds-instance-stop.png b/faults/aws/icons/rds-instance-stop.png
diff --git a/faults/aws/rds-instance-stop/engine.yaml b/faults/aws/rds-instance-stop/engine.yaml
@@ -0,0 +1,34 @@
+---
+apiVersion: litmuschaos.io/v1alpha1
+kind: ChaosEngine
+metadata:
+  name: aws-chaos
+  namespace: default
+spec:
+  engineState: 'active'
+  chaosServiceAccount: litmus-admin
+  experiments:
+    - name: rds-instance-stop
+      spec:
+        components:
+          env:
+            # set chaos duration (in sec) as desired
+            - name: TOTAL_CHAOS_DURATION
+              value: '30'
+
+            # set interval duration (in sec) as desired
+            - name: CHAOS_INTERVAL
+              value: '30'
+
+            # Instance Identifier of the target RDS instances
+            # ex: rds-demo-instance-1,rds-demo-instance-2
+            - name: RDS_INSTANCE_IDENTIFIER
+              value: ''
+
+            # provide the region name of the instance
+            - name: REGION
+              value: ''
+
+            # Target the percentage of instance filtered from tag
+            - name: INSTANCE_AFFECTED_PERC
+              value: ''
diff --git a/faults/aws/rds-instance-stop/fault.yaml b/faults/aws/rds-instance-stop/fault.yaml
@@ -0,0 +1,104 @@
+---
+apiVersion: litmuschaos.io/v1alpha1
+description:
+  message: |
+    Stopping an RDS instance identified by instance identifier.
+kind: ChaosExperiment
+metadata:
+  name: rds-instance-stop
+  labels:
+    name: rds-instance-stop
+    app.kubernetes.io/part-of: litmus
+    app.kubernetes.io/component: chaosexperiment
+    app.kubernetes.io/version: ci
+spec:
+  definition:
+    scope: Cluster
+    permissions:
+      # Create and monitor the experiment & helper pods
+      - apiGroups: [""]
+        resources: ["pods"]
+        verbs:
+          [
+            "create",
+            "delete",
+            "get",
+            "list",
+            "patch",
+            "update",
+            "deletecollection",
+          ]
+      # Performs CRUD operations on the events inside chaosengine and chaosresult
+      - apiGroups: [""]
+        resources: ["events"]
+        verbs: ["create", "get", "list", "patch", "update"]
+      # Fetch configmaps & secrets details and mount it to the experiment pod (if specified)
+      - apiGroups: [""]
+        resources: ["secrets", "configmaps"]
+        verbs: ["get", "list"]
+      # Track and get the runner, experiment, and helper pods log
+      - apiGroups: [""]
+        resources: ["pods/log"]
+        verbs: ["get", "list", "watch"]
+      # for creating and managing to execute commands inside target container
+      - apiGroups: [""]
+        resources: ["pods/exec"]
+        verbs: ["get", "list", "create"]
+      # for configuring and monitor the experiment job by the chaos-runner pod
+      - apiGroups: ["batch"]
+        resources: ["jobs"]
+        verbs: ["create", "list", "get", "delete", "deletecollection"]
+      # for creation, status polling and deletion of litmus chaos resources used within a chaos workflow
+      - apiGroups: ["litmuschaos.io"]
+        resources: ["chaosengines", "chaosexperiments", "chaosresults"]
+        verbs: ["create", "list", "get", "patch", "update", "delete"]
+      # for experiment to perform node status checks
+      - apiGroups: [""]
+        resources: ["nodes"]
+        verbs: ["get", "list"]
+    image: "litmuschaos.docker.scarf.sh/litmuschaos/go-runner:latest"
+    imagePullPolicy: Always
+    args:
+      - -c
+      - ./experiments -name rds-instance-stop
+    command:
+      - /bin/bash
+    env:
+      - name: TOTAL_CHAOS_DURATION
+        value: "30"
+
+      - name: CHAOS_INTERVAL
+        value: "30"
+
+      # Period to wait before and after injection of chaos in sec
+      - name: RAMP_TIME
+        value: ""
+
+      # Provide the RDS instance identifier
+      - name: RDS_INSTANCE_IDENTIFIER
+        value: ""
+
+      - name: REGION
+        value: ""
+
+      - name: INSTANCE_AFFECTED_PERC
+        value: ""
+
+      - name: SEQUENCE
+        value: "parallel"
+
+      - name: DEFAULT_HEALTH_CHECK
+        value: "false"
+
+      # Provide the path of AWS credentials mounted from secret
+      - name: AWS_SHARED_CREDENTIALS_FILE
+        value: "/tmp/cloud_config.yml"
+
+    labels:
+      name: rds-instance-stop
+      app.kubernetes.io/part-of: litmus
+      app.kubernetes.io/component: experiment-job
+      app.kubernetes.io/version: ci
+    secrets:
+      - name: cloud-secret
+        mountPath: /tmp/
diff --git a/faults/aws/rds-instance-stop/rds-instance-stop.chartserviceversion.yaml b/faults/aws/rds-instance-stop/rds-instance-stop.chartserviceversion.yaml
@@ -0,0 +1,33 @@
+---
+apiVersion: litmuschaos.io/v1alpha1
+kind: ChartServiceVersion
+metadata:
+  createdAt: 2024-09-06T10:28:08Z
+  name: rds-instance-stop
+  version: 0.1.0
+  annotations:
+    categories: AWS
+    vendor: LitmusChaos
+spec:
+  displayName: RDS Instance Stop
+  categoryDescription: |
+    This fault causes termination of an RDS instance for a certain chaos duration.
+    - Causes termination of an RDS instance using instance identifier before bringing it back to available state after the specified chaos duration.
+    - It helps to check the performance of the application when the RDS instance is stopped.
+  keywords:
+    - AWS
+  platforms:
+    - AWS
+  maintainers:
+    - name: Udit Gaurav
+      email: [email protected]
+  chaosType: infra
+  labels:
+    app.kubernetes.io/component: chartserviceversion
+    app.kubernetes.io/version: ci
+  links:
+    - name: Documentation
+      url: https://litmuschaos.github.io/litmus/experiments/categories/contents
+  icon:
+    - url:
+      mediatype: ""