diff --git a/cmoa_install.yaml b/cmoa_install.yaml index 20c9876..81024b0 100755 --- a/cmoa_install.yaml +++ b/cmoa_install.yaml @@ -8,5 +8,6 @@ REGISTRY: 10.10.31.243:5000/cmoa3 REPO: "nexus" # dockerhub or nexus roles: - - role: cmoa_install - delegate_to: 127.0.0.1 + - role: agent_os_setting +# - role: cmoa_install +# delegate_to: 127.0.0.1 diff --git a/cmoa_pgpatch.yaml b/cmoa_pgpatch.yaml new file mode 100755 index 0000000..efa4ce4 --- /dev/null +++ b/cmoa_pgpatch.yaml @@ -0,0 +1,15 @@ +--- +- hosts: cluster + become: true + gather_facts: true + environment: + KUBECONFIG: /root/.kube/ansible_config + vars: + cmoa_namespace: imxc + pg_version: + - 3.5.2 + - 3.5.3 + - 3.5.4 + roles: + - role: cmoa_pgpatch + delegate_to: 127.0.0.1 diff --git a/inventory b/inventory index b6ba6b7..921af17 100755 --- a/inventory +++ b/inventory @@ -1,16 +1,12 @@ [master] -10.10.43.206 +10.10.43.210 [worker1] -10.10.43.207 [worker2] -10.10.43.208 [cluster:children] master -worker1 -worker2 [master:vars] diff --git a/roles/cmoa_install/tasks/.03-ddl-dml.yml.swp b/roles/cmoa_install/tasks/.03-ddl-dml.yml.swp new file mode 100644 index 0000000..abcc7e4 Binary files /dev/null and b/roles/cmoa_install/tasks/.03-ddl-dml.yml.swp differ diff --git a/roles/cmoa_pgpatch/defaults/main.yml b/roles/cmoa_pgpatch/defaults/main.yml new file mode 100644 index 0000000..337a57a --- /dev/null +++ b/roles/cmoa_pgpatch/defaults/main.yml @@ -0,0 +1,7 @@ +--- +cmoa_namespace: imxc +pg_version: + - 3.5.1 + - 3.5.2 + - 3.5.3 + - 3.5.4 diff --git a/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.2.0.psql b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.2.0.psql new file mode 100644 index 0000000..7ed34ad --- /dev/null +++ b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.2.0.psql @@ -0,0 +1,803 @@ +UPDATE public.metric_meta2 SET expr='sum by (xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) ((container_memory_usage_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / (((container_spec_memory_limit_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0)) > 0) * 100) or sum by (xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) ((container_memory_usage_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1024 / 1024 / 1024 *100)' WHERE id = 'container_memory_usage_by_workload'; + +UPDATE public.agent_install_file_info SET yaml = '--- +apiVersion: v1 +kind: List +items: +- apiVersion: apps/v1 + kind: Deployment + metadata: + name: cloudmoa-trace-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-trace-agent + spec: + selector: + matchLabels: + app: cloudmoa-trace-agent + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: cloudmoa-trace-agent + spec: + securityContext: + runAsNonRoot: true + runAsUser: 65534 + containers: + - image: $DOCKER_REGISTRY_URL/trace-agent:$IMAGE_TAG + name: cloudmoa-trace-agent + resources: + requests: + cpu: 100m + memory: 50Mi + limits: + cpu: 200m + memory: 100Mi + ports: + - containerPort: 5775 + protocol: UDP + - containerPort: 6831 + protocol: UDP + - containerPort: 6832 + protocol: UDP + - containerPort: 5778 + protocol: TCP + env: + - name: LOG_LEVEL + value: "INFO" + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: DATAGATE + value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT +- apiVersion: v1 + kind: Service + metadata: + name: cloudmoa-trace-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-trace-agent + spec: + ports: + - name: agent-zipkin-thrift + port: 5775 + protocol: UDP + targetPort: 5775 + - name: agent-compact + port: 6831 + protocol: UDP + targetPort: 6831 + - name: agent-binary + port: 6832 + protocol: UDP + targetPort: 6832 + - name: agent-configs + port: 5778 + protocol: TCP + targetPort: 5778 + selector: + app: cloudmoa-trace-agent + type: ClusterIP' WHERE id = 7; + +UPDATE public.agent_install_file_info SET yaml = '--- +apiVersion: v1 +kind: Service +metadata: + annotations: + prometheus.io/scrape: ''true'' + labels: + app: cloudmoa-node-exporter + name: cloudmoa-node-exporter + name: cloudmoa-node-exporter + namespace: $CLOUDMOA_NAMESPACE +spec: + clusterIP: None + ports: + - name: scrape + port: 9110 + protocol: TCP + selector: + app: cloudmoa-node-exporter + type: ClusterIP +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: cloudmoa-node-exporter + namespace: $CLOUDMOA_NAMESPACE +spec: + selector: + matchLabels: + app: cloudmoa-node-exporter + template: + metadata: + labels: + app: cloudmoa-node-exporter + name: cloudmoa-node-exporter + spec: + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + containers: + - image: $DOCKER_REGISTRY_URL/node-exporter + name: cloudmoa-node-exporter + ports: + - containerPort: 9110 + hostPort: 9110 + name: scrape + args: + - --path.procfs=/host/proc + - --path.sysfs=/host/sys + - --path.rootfs=/host/root + - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|run|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/) + - --collector.tcpstat + - --web.listen-address=:9110 + # --log.level=debug + resources: + limits: + cpu: 250m + memory: 180Mi + requests: + cpu: 102m + memory: 180Mi + volumeMounts: + - mountPath: /host/proc + name: proc + readOnly: false + - mountPath: /host/sys + name: sys + readOnly: false + - mountPath: /host/root + mountPropagation: HostToContainer + name: root + readOnly: true + hostNetwork: true + hostPID: true + securityContext: + runAsNonRoot: true + runAsUser: 65534 + volumes: + - hostPath: + path: /proc + name: proc + - hostPath: + path: /sys + name: sys + - hostPath: + path: / + name: root +' WHERE id = 4; + +UPDATE public.agent_install_file_info SET yaml = '--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cloudmoa-cluster-role +rules: + - nonResourceURLs: + - "*" + verbs: + - get + - apiGroups: + - metrics.k8s.io + resources: + - pods + - nodes + verbs: + - get + - list + - watch + - apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch + - update + - apiGroups: + - "" + resources: + - services + verbs: + - get + - list + - watch + - update + - apiGroups: + - "" + resources: + - nodes/stats + - endpoints + - namespaces + - events + verbs: + - get + - list + - watch + - apiGroups: + - apps + resources: + - daemonsets + - deployments + - deployments/scale + - replicasets + - replicasets/scale + - statefulsets + - statefulsets/scale + verbs: + - get + - list + - watch + - apiGroups: + - batch + resources: + - jobs + verbs: + - get + - list + - watch + - update + - apiGroups: + - batch + resources: + - cronjobs + verbs: + - get + - list + - update + - apiGroups: + - storage.j8s.io + resources: + - storageclasses + verbs: + - get + - list + - apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - get + - list + - apiGroups: + - extensions + resources: + - ingresses + verbs: + - get + - list + - apiGroups: + - policy + resources: + - podsecuritypolicies + verbs: + - use + resourceNames: + - imxc-ps + - apiGroups: + - certificates.k8s.io + resourceNames: + - kubernetes.io/kube-apiserver-client-kubelet + resources: + - signers + verbs: + - approve + - apiGroups: + - certificates.k8s.io + resourceNames: + - kubernetes.io/kubelet-serving + resources: + - signers + verbs: + - approve + - apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - watch + - proxy + - apiGroups: + - "" + resources: + - nodes/log + - nodes/metrics + - nodes/proxy + - nodes/spec + - nodes/stats + verbs: + - ''*'' + - apiGroups: + - ''*'' + resources: + - ''*'' + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cloudmoa-restricted-rb + namespace: $CLOUDMOA_NAMESPACE +subjects: + - kind: ServiceAccount + name: default + namespace: $CLOUDMOA_NAMESPACE +roleRef: + kind: ClusterRole + name: cloudmoa-cluster-role + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: cloudmoa-psp + namespace: $CLOUDMOA_NAMESPACE +spec: + privileged: true + seLinux: + rule: RunAsAny + supplementalGroups: + rule: RunAsAny + runAsUser: + rule: RunAsAny + fsGroup: + rule: RunAsAny + hostPorts: + - max: 65535 + min: 0 + hostNetwork: true + hostPID: true + volumes: + - configMap + - secret + - emptyDir + - hostPath + - projected + - downwardAPI + - persistentVolumeClaim +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: cloudmoa-topology-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-topology-agent +spec: + selector: + matchLabels: + app: cloudmoa-topology-agent + template: + metadata: + labels: + app: cloudmoa-topology-agent + spec: + hostNetwork: true + hostPID: true + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + containers: + - name: cloudmoa-topology-agent + image: $DOCKER_REGISTRY_URL/topology-agent:$IMAGE_TAG + imagePullPolicy: Always + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: 500m + memory: 600Mi + securityContext: + privileged: true + volumeMounts: + - mountPath: /host/usr/bin + name: bin-volume + - mountPath: /var/run/docker.sock + name: docker-volume + - mountPath: /host/proc + name: proc-volume + - mountPath: /root + name: root-volume + - mountPath: /log + name: log-volume + env: + - name: DATAGATE + value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: ROOT_DIRECTORY + value: /root + - name: NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: LOG_LEVEL + value: "INFO" + + volumes: + - name: bin-volume + hostPath: + path: /usr/bin + type: Directory + - name: docker-volume + hostPath: + path: /var/run/docker.sock + - name: proc-volume + hostPath: + path: /proc + - name: root-volume + hostPath: + path: / + - name: log-volume + hostPath: + path: /home' WHERE id = 2; + +UPDATE public.agent_install_file_info SET yaml = '--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: cloudmoa-metric-agent-config + namespace: $CLOUDMOA_NAMESPACE +data: + scaling.rules: | + groups: + - name: scaleup + rules : + - alert : ScaleUpRule + expr: job:webapp_config_open_sessions_current_count:sum > 15 + annotations: + summary: "Scale up when current sessions is greater than 15" + description: "Firing when total sessions active greater than 15" + metric-agent.yml: | + global: + scrape_interval: 15s + + scrape_configs: + - job_name: ''kubernetes-kubelet'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - target_label: xm_entity_type + replacement: ''Node'' + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (kubelet_running_pod_count) + action: keep + + - job_name: ''kubernetes-node-exporter'' + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - source_labels: [__meta_kubernetes_role] + action: replace + target_label: kubernetes_role + - source_labels: [__address__] + regex: ''(.*):10250'' + replacement: ''${1}:9110'' + target_label: __address__ + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: __instance__ + # set "name" value to "job" + - source_labels: [job] + regex: ''kubernetes-(.*)'' + replacement: ''${1}'' + target_label: name + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Node'' + - source_labels: [__meta_kubernetes_namespace] + separator: ; + regex: (.*) + target_label: xm_namespace + replacement: $1 + action: replace + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total) + action: keep + + - job_name: ''kubernetes-cadvisor'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: node + metrics_path: /metrics/cadvisor + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Container'' + + metric_relabel_configs: + - source_labels: [namespace] + target_label: xm_namespace + - source_labels: [pod] + target_label: xm_pod_id + - source_labels: [container] + target_label: xm_cont_name + - source_labels: [id] + target_label: xm_cont_id + - source_labels: [container] + regex: (.+) + action: keep + - source_labels: [ __name__ ] + regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes) + action: keep +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cloudmoa-metric-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-metric-agent +spec: + selector: + matchLabels: + app: cloudmoa-metric-agent + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: cloudmoa-metric-agent + spec: + containers: + - name: cloudmoa-metric-agent + image: $DOCKER_REGISTRY_URL/metric-agent:$IMAGE_TAG + args: + - --config.file=/etc/metric-agent/metric-agent.yml + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 300m + memory: 1000Mi + volumeMounts: + - mountPath: /etc/metric-agent/ + name: config-volume + env: + - name: LOG_LEVEL + value: "INFO" + - name: LOG_MAXAGE + value: "1" + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: DATAGATE + value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT + - name: STORAGE_TYPE + value: datagate + restartPolicy: Always + volumes: + - name: config-volume + configMap: + name: cloudmoa-metric-agent-config +' WHERE id = 6; + +UPDATE public.agent_install_file_info SET yaml = '--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: cloudmoa-metric-agent-config + namespace: $CLOUDMOA_NAMESPACE +data: + scaling.rules: | + groups: + - name: scaleup + rules : + - alert : ScaleUpRule + expr: job:webapp_config_open_sessions_current_count:sum > 15 + annotations: + summary: "Scale up when current sessions is greater than 15" + description: "Firing when total sessions active greater than 15" + metric-agent.yml: | + global: + scrape_interval: 15s + + scrape_configs: + - job_name: ''kubernetes-kubelet'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - target_label: xm_entity_type + replacement: ''Node'' + + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (kubelet_running_pod_count) + action: keep + + - job_name: ''kubernetes-node-exporter'' + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - source_labels: [__meta_kubernetes_role] + action: replace + target_label: kubernetes_role + - source_labels: [__address__] + regex: ''(.*):10250'' + replacement: ''${1}:9110'' + target_label: __address__ + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: __instance__ + # set "name" value to "job" + - source_labels: [job] + regex: ''kubernetes-(.*)'' + replacement: ''${1}'' + target_label: name + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Node'' + - source_labels: [__meta_kubernetes_namespace] + separator: ; + regex: (.*) + target_label: xm_namespace + replacement: $1 + action: replace + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total) + action: keep + + + - job_name: ''kubernetes-cadvisor'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + metrics_path: /metrics/cadvisor + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Container'' + + metric_relabel_configs: + - source_labels: [namespace] + target_label: xm_namespace + - source_labels: [pod_name] + target_label: xm_pod_id + - source_labels: [container_name] + target_label: xm_cont_name + - source_labels: [id] + target_label: xm_cont_id + - source_labels: [container_name] + regex: (.+) + action: keep + - source_labels: [ __name__ ] + regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes) + action: keep +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cloudmoa-metric-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-metric-agent +spec: + selector: + matchLabels: + app: cloudmoa-metric-agent + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: cloudmoa-metric-agent + spec: + containers: + - name: cloudmoa-metric-agent + image: $DOCKER_REGISTRY_URL/metric-agent:$IMAGE_TAG + args: + - --config.file=/etc/metric-agent/metric-agent.yml + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 300m + memory: 1000Mi + volumeMounts: + - mountPath: /etc/metric-agent/ + name: config-volume + env: + - name: LOG_LEVEL + value: "INFO" + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: DATAGATE + value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT + - name: STORAGE_TYPE + value: datagate + restartPolicy: Always + volumes: + - name: config-volume + configMap: + name: cloudmoa-metric-agent-config +' WHERE id = 3; \ No newline at end of file diff --git a/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.3.0.psql b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.3.0.psql new file mode 100644 index 0000000..6b63e62 --- /dev/null +++ b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.3.0.psql @@ -0,0 +1,919 @@ + +-- from diff + +CREATE DATABASE CONFIGS; +CREATE DATABASE keycloak; + +-- cortex alert +create table public.alert_rule_config_info ( + config_id varchar not null, + config_data text not null, + in_use boolean default true not null, + created_date timestamp, + modified_date timestamp +); +create table alert_config_info +( + config_id varchar not null, + config_data text not null, + config_default text not null, + in_use boolean default true not null, + created_date timestamp, + modified_date timestamp +); +create table alert_config +( + id bigint not null, + cluster_id varchar, + resolve_timeout varchar, + receiver varchar, + group_by varchar, + group_wait varchar, + group_interval varchar, + repeat_interval varchar, + routes_level varchar, + routes_continue varchar, + receiver_name varchar, + webhook_url varchar, + send_resolved varchar, + inner_route boolean, + inner_webhook boolean, + in_use boolean default true not null, + created_date timestamp, + modified_date timestamp +); +ALTER TABLE public.alert_rule_config_info ADD CONSTRAINT alert_rule_config_info_config_id_pk PRIMARY KEY (config_id); +ALTER TABLE public.alert_config_info ADD CONSTRAINT alert_config_info_config_id_pk PRIMARY KEY (config_id); +ALTER TABLE public.alert_config ADD CONSTRAINT alert_config_id_pk PRIMARY KEY (id); + + + +alter table tenant_info + add delete_scheduler_date timestamp; + +alter table tenant_info + add tenant_init_clusters varchar(255); + +alter table cloud_user + add dormancy_date timestamp; + +alter table cloud_user + add status varchar(255) default 'use'::character varying not null; + +-- DELETE +-- FROM public.auth_resource3 +-- WHERE name = 'menu|Health Check|Check Script'; + +-- DELETE +-- FROM public.auth_resource3 +-- WHERE name = 'menu|Health Check'; + +INSERT INTO public.auth_resource3 (name, is_deleted, tenant_id) VALUES ('menu|Services|Active Transaction', false, null); + +UPDATE public.menu_meta +SET position = 10::integer +WHERE id = 80::bigint; + +UPDATE public.menu_meta +SET position = 99::integer +WHERE id = 90::bigint; + + + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) VALUES (26, 'Active Transaction', NULL, 5, 'overviewServiceJSPD', (select id from auth_resource3 where name='menu|Services|Active Transaction'), 2); +insert into public.alert_config_info (config_id, created_date, modified_date, config_data, config_default, in_use) values ('config', now(), null, 'global:${GLOBAL}\nroute:${ROUTE}\nreceivers:${RECEIVERS}', 'global:${GLOBAL}\nroute:${ROUTE}\nreceivers:${RECEIVERS}', true); +insert into public.alert_config_info (config_id, created_date, modified_date, config_data, config_default, in_use) values ('global', now(), null, '\n resolve_timeout: ${RESOLVE_TIMEOUT}', '\n resolve_timeout: 5m', true); +insert into public.alert_config_info (config_id, created_date, modified_date, config_data, config_default, in_use) values ('receivers', now(), null, '\n- name: ''${NAME}''\n webhook_configs:${WEBHOOK_CONFIGS}', '\n- name: ''cdms''\n webhook_configs:${WEBHOOK_CONFIGS}', true); +insert into public.alert_config_info (config_id, created_date, modified_date, config_data, config_default, in_use) values ('route', now(), null, '\n receiver: ''${RECEIVER}''\n group_by: [${GROUP_BY}]\n group_wait: ${GROUP_WAIT}\n group_interval: ${GROUP_INTERVAL}\n repeat_interval: ${REPEAT_INTERVAL}\n routes:${ROUTES}', '\n receiver: ''cdms''\n group_by: [xm_clst_id, level]\n group_wait: 30s\n group_interval: 5m\n repeat_interval: 10m\n routes:${ROUTES}', true); +insert into public.alert_config_info (config_id, created_date, modified_date, config_data, config_default, in_use) values ('webhook_configs', now(), null, '\n - url: ''${WEBHOOK_URL}''\n send_resolved: ${SEND_RESOLVED}', '\n - url: ''${WEBHOOK_URL}''\n send_resolved: false', true); +insert into public.alert_config_info (config_id, created_date, modified_date, config_data, config_default, in_use) values ('routes', now(), null, '\n - receiver: ''${ROUTES_RECEIVER}''\n group_by: [${ROUTES_GROUP_BY}]\n group_wait: ${ROUTES_GROUP_WAIT}\n group_interval: ${ROUTES_GROUP_INTERVAL}\n repeat_interval: ${ROUTES_REPEAT_INTERVAL}\n match_re:\n level: ${LEVEL}\n continue: ${CONTINUE}', '\n - receiver: ''cdms''\n group_by: [xm_clst_id, level]\n group_wait: 5s\n group_interval: 5s\n repeat_interval: 1m\n match_re:\n level: Critical\n continue: true', true); +insert into public.alert_rule_config_info (config_id, created_date, modified_date, config_data, in_use) values ('config', now(), null, 'groups:${GROUPS}', true); +insert into public.alert_rule_config_info (config_id, created_date, modified_date, config_data, in_use) values ('groups', now(), null, '\n- name: "${NAME}"\n rules:${RULES}', true); +insert into public.alert_rule_config_info (config_id, created_date, modified_date, config_data, in_use) values ('isHost', now(), null, '\n instance: "{{ $labels.instance }}"\n is_host: "true"', true); +insert into public.alert_rule_config_info (config_id, created_date, modified_date, config_data, in_use) values ('rules', now(), null, '\n - alert: "${ALERT}"\n expr: "${EXPR}"\n labels:\n level: "${LEVEL}"\n for: "${FOR}"\n annotations:\n xm_service_name: "{{ $labels.xm_service_name }}"\n level: "${LEVEL}"\n meta_id: "${META_ID}"\n xm_node_id: "{{ $labels.xm_node_id }}"\n threshold: ${THRESHOLD}\n xm_container_id: "{{ $labels.xm_cont_name }}"\n message: "${MESSAGE}"\n rule_id: ${RULE_ID}\n xm_pod_id: "{{ $labels.xm_pod_id }}"\n xm_clst_id: "{{ $labels.xm_clst_id }}"\n xm_namespace: "{{ $labels.xm_namespace }}"\n value: "{{ $value }}"\n xm_entity_type: "{{ $labels.xm_entity_type }}"', true); + + + +-- JSPD 옵션 값 테이블 +CREATE TABLE public.jspd_prop ( + code_id character varying(255) NOT NULL, + default_value character varying(255) NOT NULL, + description text, + code_type character varying(255), + input_type character varying(255), + input_props character varying(255), + use_yn boolean NOT NULL, + created_date timestamp without time zone NOT NULL, + modified_date timestamp without time zone NOT NULL +); + +ALTER TABLE ONLY public.jspd_prop ADD CONSTRAINT jspd_prop_pkey PRIMARY KEY (code_id); + +-- JSPD 옵션 값 설정 LIST table +CREATE TABLE public.jspd_config ( + cluster_id character varying(255) NOT NULL, + namespace character varying(255) NOT NULL, + service character varying(255) NOT NULL, + code_id character varying(255), + code_value character varying(255), + code_type character varying(255), + created_date timestamp without time zone NOT NULL, + modified_date timestamp without time zone NOT NULL +); +-- ALTER TABLE public.jspd_prop +-- ADD input_type character varying(255); + +-- ALTER TABLE public.jspd_prop +-- ADD input_props character varying(255); + + +ALTER TABLE public.jspd_config + ADD CONSTRAINT jspd_config_pkey PRIMARY KEY (cluster_id, namespace, service, code_id); + +ALTER TABLE ONLY public.jspd_config + ADD CONSTRAINT jspd_config_code_id_fk FOREIGN KEY (code_id) REFERENCES public.jspd_prop(code_id); + +INSERT INTO jspd_prop values('TRX_NAME_TYPE','0', 'Set the transaction name generation method (0:default, 1:parameter, 2:param_nouri, 3:attribute)', 'integer','select','{"default":"0", "parameter":"1", "param_nouri":"2", "attribute":"3"}',true, now(), now()); +INSERT INTO jspd_prop values('TRX_NAME_KEY','', 'Set the transaction name generation method by TRX_NAME_TYPE (parameter(1), param_nouri(2),attribute(3))','string','input','',true, now(), now()); +INSERT INTO jspd_prop values('CURR_TRACE_TXN','*:3000', 'Option to check TXNNAME with startsWith logic and collect calltree based on elapsetime. blank or set to *:0 when collecting all.', 'string','input','', true, now(), now()); +INSERT INTO jspd_prop values('CURR_TRACE_LEVEL','100', 'call tree detection level', 'integer','range','{"gte":"0", "lte":"100"}',true, now(), now()); +INSERT INTO jspd_prop values('TRACE_JDBC','true', 'include call tree data', 'boolean','input','',true, now(), now()); +INSERT INTO jspd_prop values('EXCLUDE_SERVICE','gif,js,css,xml', 'exclude service name', 'string','input','',true, now(), now()); +INSERT INTO jspd_prop values('INCLUDE_EXCEPTION','', 'Exception that you do not want to be treated as an exception transaction is set.(type.Exception)', 'string','input','',true, now(), now()); +INSERT INTO jspd_prop values('EXCLUDE_EXCEPTION','', 'Set the exception to be treated as an exception transaction.(type.Exception)', 'string','input','',true, now(), now()); +INSERT INTO jspd_prop values('RESP_HEADER_TID','false', 'include X-Xm-Tid text for gearing imxwsmj', 'boolean','input','',true, now(), now()); +INSERT INTO jspd_prop values('USE_RUNTIME_REDEFINE','false', 'rt.jar (socket, file, throwable) function use yn option', 'boolean','input','',true, now(), now()); +INSERT INTO jspd_prop values('USE_RUNTIME_REDEFINE_HTTP_REMOTE','false', 'rt.jar (socket, file, throwable) function use yn option', 'boolean','input','',true, now(), now()); +INSERT INTO jspd_prop values('RT_RMI','false', 'rt.jar (socket, file, throwable) function use yn option', 'boolean','input','',true, now(), now()); + +INSERT INTO jspd_prop values('RT_RMI_TYPE','3', 'remote key value(1: pkey, 2: ckey, 3: pckey)', 'integer','select','{"pkey":"1", "ckey":"2", "pckey":"3"}',true, now(), now()); +INSERT INTO jspd_prop values('RT_RMI_ELAPSE_TIME','0', 'Collect transactions that are greater than or equal to the option value', 'integer','input','',true, now(), now()); +INSERT INTO jspd_prop values('RT_FILE','0x10', 'Display file input/output in call tree', 'string','input','',true, now(), now()); +INSERT INTO jspd_prop values('RT_SOCKET','0x10', 'Display socket input/output in call tree', 'string','input','',true, now(), now()); + +INSERT INTO jspd_prop values('MTD_LIMIT','100000', 'Limit the number of calltree', 'integer','range','{"gte":"0"}',true, now(), now()); + +INSERT INTO jspd_prop values('LIMIT_SQL','20', 'Collection limits based on SQL sentence length', 'integer','input','',true, now(), now()); +INSERT INTO jspd_prop values('TXN_COUNT_LIMIT','3000', 'Transactions per second', 'integer','input','',true, now(), now()); +INSERT INTO jspd_prop values('USE_SQL_ELLIPSIS','false', 'Collect length of sql string by half of SQL_TEXT_BUFFER_SIZE', 'boolean','input','',true, now(), now()); +INSERT INTO jspd_prop values('TXN_SQL_LIMIT_COUNT','2000', 'SQL collection limit', 'integer','input','',true, now(), now()); +INSERT INTO jspd_prop values('TXN_CPU_TIME','false', 'cpu time metric used in transactions option', 'boolean','input','',true, now(), now()); +INSERT INTO jspd_prop values('TXN_MEMORY','false', 'memory alloc size metric used in transactions option', 'boolean','input','',true, now(), now()); +INSERT INTO jspd_prop values('ENABLE_WEB_ID_WHEN_NO_USERAGENT','false', 'Do not create an web ID unless requested by the browser', 'boolean','input','',true, now(), now()); +INSERT INTO jspd_prop values('USE_SQL_SEQ','false', 'Add sequence number to sql and packet', 'boolean','input','',true, now(), now()); +INSERT INTO jspd_prop values('TRACE_FETCH_METHOD','false', 'Display the fetch function of ResultSet in the call tree', 'boolean','input','',true, now(), now()); +INSERT INTO jspd_prop values('EXCLUDE_THREAD','', 'Ability to block monitoring of a specific thread name, value = String[] (prefix1,prefix2)', 'string','input','',true, now(), now()); +INSERT INTO jspd_prop values('USE_METHOD_SEQ','false', 'Display the calltree in the form of a time series without summary', 'boolean','input','',true, now(), now()); +INSERT INTO jspd_prop values('TRACE_METHOD_MEMORY','false', 'Collects allocation memory for each method of calltree. (unit k)', 'boolean','input','',true, now(), now()); +INSERT INTO jspd_prop values('TRACE_METHOD_CPUTIME','false', 'Collects cputime for each method of calltree. (unit ms)', 'boolean','input','',true, now(), now()); +INSERT INTO jspd_prop values('DISABLE_ROOT_METHOD','false', 'Express the service root method at the top of the call tree', 'boolean','input','',true, now(), now()); +INSERT INTO jspd_prop values('MTD_BUFFER_SIZE','2500', 'size of the internal buffer that stores the call tree method data.', 'integer','input','',true, now(), now()); +INSERT INTO jspd_prop values('MTD_STACK_BUFFER_SIZE','100', 'A separate option to additionally collect methods that did not generate an error among methods that were not collected because the MTD_BUFFER_SIZE option value was exceeded.', 'integer','input','',true, now(), now()); +INSERT INTO jspd_prop values('MTD_EXCEPTION_BUFFER_SIZE','100', 'A separate option to additionally collect methods that have an error among methods that could not be collected because the MTD_BUFFER_SIZE option value was exceeded.', 'integer','input','',true, now(), now()); +INSERT INTO jspd_prop values('DEBUG','0x000000000', 'Option to specify log level (Debugging)', 'string','input','',true, now(), now()); + +INSERT INTO jspd_prop values('EXCEPTION_LIMIT', '-1', 'Exception content length limit', 'integer', 'input', '', true, now(), now()); +INSERT INTO jspd_prop values('TXN_SEND_PERIOD', '1000', 'Txninfo transmission cycle (ms)', 'integer', 'input', '', true, now(), now()); +INSERT INTO jspd_prop values('MTD_SEND_PERIOD', '1000', 'Txnmethod transmission cycle (ms)', 'integer', 'input', '', true, now(), now()); +INSERT INTO jspd_prop values('SQL_SEND_PERIOD', '1000', 'Txnspl transmission cycle (ms)', 'integer', 'input', '', true, now(), now()); +INSERT INTO jspd_prop values('ETOE_SEND_PERIOD', '1000', 'E2einfo transmission cycle (ms)', 'integer', 'input', '', true, now(), now()); +INSERT INTO jspd_prop values('TXN_SEND_LIMIT', '15000', 'Txninfo maximum number of transfers', 'integer', 'input', '', true, now(), now()); +INSERT INTO jspd_prop values('MTD_SEND_LIMIT', '15000', 'Txnmethod maximum number of transfers', 'integer', 'input', '', true, now(), now()); +INSERT INTO jspd_prop values('SQL_SEND_LIMIT', '15000', 'Txnsql maximum number of transfers', 'integer', 'input', '', true, now(), now()); +INSERT INTO jspd_prop values('ETOE_SEND_LIMIT', '15000', 'E2einfo maximum number of transfers', 'integer', 'input', '', true, now(), now()); + + +---public.metric_meta2 +UPDATE public.metric_meta2 SET expr = '((node_memory_MemTotal_bytes{xm_entity_type="Node", {filter}} - (node_memory_MemFree_bytes{xm_entity_type="Node", {filter}} + node_memory_Cached_bytes{xm_entity_type="Node", {filter}} + node_memory_Buffers_bytes{xm_entity_type="Node", {filter}} + node_memory_SReclaimable_bytes{xm_entity_type="Node", {filter}})) >= 0 or node_memory_MemTotal_bytes{xm_entity_type="Node", {filter}} - node_memory_MemFree_bytes{xm_entity_type="Node", {filter}}) / 1024 / 1024 / 1024'::text WHERE id LIKE 'node#_memory#_used' ESCAPE '#'; + +UPDATE public.metric_meta2 SET expr = '((node_memory_MemTotal_bytes{{filter}} - (node_memory_MemFree_bytes{{filter}} + node_memory_Cached_bytes{{filter}} + node_memory_Buffers_bytes{{filter}} + node_memory_SReclaimable_bytes{{filter}})) >= 0 or (node_memory_MemTotal_bytes{{filter}} - node_memory_MemFree_bytes{{filter}})) / node_memory_MemTotal_bytes{{filter}} * 100'::text WHERE id LIKE 'host#_memory#_usage' ESCAPE '#'; + +UPDATE public.metric_meta2 SET expr = 'sum by(instance, mountpoint, fstype, data_type) ( +label_replace(node_filesystem_size_bytes {fstype!="rootfs",{filter}}, "data_type", "totalsize", "", "") or +label_replace(node_filesystem_avail_bytes {fstype!="rootfs",{filter}}, "data_type", "availablesize", "", ""))'::text WHERE id LIKE 'host#_fs#_total#_by#_mountpoint' ESCAPE '#'; + +UPDATE public.metric_meta2 SET expr = '(1- avg by (xm_clst_id) (((node_memory_MemFree_bytes{xm_entity_type=''Node'', {filter}} + node_memory_Cached_bytes{xm_entity_type=''Node'', {filter}} + node_memory_Buffers_bytes{xm_entity_type=''Node'', {filter}}) <= node_memory_MemTotal_bytes{xm_entity_type=''Node'', {filter}} or node_memory_MemFree_bytes{xm_entity_type=''Node'', {filter}}) / node_memory_MemTotal_bytes{xm_entity_type=''Node'', {filter}})) * 100'::text WHERE id LIKE 'cluster#_memory#_usage' ESCAPE '#'; + + +UPDATE public.metric_meta2 SET expr = '((node_memory_MemTotal_bytes{xm_entity_type=''Node'', {filter}} - (node_memory_MemFree_bytes{xm_entity_type=''Node'', {filter}} + node_memory_Cached_bytes{xm_entity_type=''Node'', {filter}} + node_memory_Buffers_bytes{xm_entity_type=''Node'', {filter}} + node_memory_SReclaimable_bytes{xm_entity_type=''Node'', {filter}})) >= 0 or (node_memory_MemTotal_bytes{xm_entity_type=''Node'', {filter}} - node_memory_MemFree_bytes{xm_entity_type=''Node'', {filter}})) / node_memory_MemTotal_bytes{xm_entity_type=''Node'', {filter}} * 100'::text WHERE id LIKE 'node#_memory#_usage' ESCAPE '#'; + +UPDATE public.metric_meta2 SET expr = '(node_memory_MemTotal_bytes{{filter}} - (node_memory_MemFree_bytes{{filter}} + node_memory_Cached_bytes{{filter}} + node_memory_Buffers_bytes{{filter}} + node_memory_SReclaimable_bytes{{filter}})) >= 0 or (node_memory_MemTotal_bytes{{filter}} - node_memory_MemFree_bytes{{filter}})'::text WHERE id LIKE 'host#_memory#_used' ESCAPE '#'; + + +INSERT INTO public.metric_meta2 (id, meta_name, description, expr, resource_type, entity_type, groupby_keys, in_use, anomaly_score, message, created_date, modified_date) VALUES +('imxc_jspd_pod_txn_error_rate', 'Service Pod Transaction Error Rate', 'The number of transaction error rate for pod', 'sum by(xm_clst_id, xm_namespace, xm_pod_id, xm_service_name) (rate(imxc_txn_total_count{{filter}}[1m])) == 0 or sum by(xm_clst_id, xm_namespace, xm_pod_id, xm_service_name) (rate(imxc_txn_error_count{{filter}}[1m])) == 0 or sum by(xm_clst_id, xm_namespace, xm_pod_id, xm_service_name) (rate(imxc_txn_error_count {{filter}} [1m])) / sum by(xm_clst_id, xm_namespace, xm_pod_id, xm_service_name) (rate(imxc_txn_total_count {{filter}} [1m]))', 'Request', 'Service', NULL, 't', 'f', 'SVC:{{$labels.xm_service_name}} Svc Pod Transaction Error rate:{{humanize $value}}|{threshold}.', '2022-02-15 18:08:58.18', '2022-02-15 18:08:58.18'); +INSERT INTO public.metric_meta2 (id, meta_name, description, expr, resource_type, entity_type, groupby_keys, in_use, anomaly_score, message, created_date, modified_date) VALUES +('imxc_jspd_txn_error_rate', 'Service Transaction Error Rate', 'Service Transaction Error Rate', 'sum by(xm_clst_id, xm_namespace, xm_service_name) (rate(imxc_txn_total_count{{filter}}[1m])) == 0 or sum by(xm_clst_id, xm_namespace, xm_service_name) (rate(imxc_txn_error_count{{filter}}[1m])) == 0 or sum by(xm_clst_id, xm_namespace, xm_service_name) (rate(imxc_txn_error_count {{filter}} [1m])) / sum by(xm_clst_id, xm_namespace, xm_service_name) (rate(imxc_txn_total_count {{filter}} [1m]))', 'Request', 'Service', NULL, 't', 'f', 'SVC:{{$labels.xm_service_name}} Error Request Rate:{{humanize $value}}%|{threshold}%.', '2022-02-15 14:33:00.118', '2022-02-15 15:40:17.64'); +INSERT INTO public.metric_meta2 (id, meta_name, description, expr, resource_type, entity_type, groupby_keys, in_use, anomaly_score, message, created_date, modified_date) VALUES +('imxc_jspd_txn_elapsed_time_avg', 'Service Transaction Elapsed Time (avg)', 'Service Average Elapsed Time', 'sum by(xm_clst_id, xm_namespace, xm_service_name) ((increase(imxc_txn_total_count{{filter}}[1m])))== 0 or sum by(xm_clst_id, xm_namespace, xm_service_name) ((increase(imxc_txn_laytency{{filter}}[1m])))/ sum by(xm_clst_id, xm_namespace, xm_service_name) ((increase(imxc_txn_total_count{{filter}}[1m])))', 'Request', 'Service', NULL, 't', 't', 'SVC:{{$labels.xm_service_name}} Transaction Requests Time Avg:{{humanize $value}}ms|{threshold}ms.', '2021-11-15 16:09:34.233', '2021-11-15 16:12:21.335'); +INSERT INTO public.metric_meta2 (id, meta_name, description, expr, resource_type, entity_type, groupby_keys, in_use, anomaly_score, message, created_date, modified_date) VALUES +('imxc_jspd_pod_txn_elapsed_time_avg', 'Service Pod Transaction Elapsed Time (avg)', 'The number of transaction counts per second for pod', 'sum by(xm_clst_id, xm_namespace, xm_pod_id, xm_service_name) (increase(imxc_txn_total_count{{filter}}[1m]))==0 or sum by(xm_clst_id, xm_namespace, xm_pod_id, xm_service_name) (increase(imxc_txn_laytency{{filter}}[1m])) / sum by(xm_clst_id, xm_namespace, xm_pod_id, xm_service_name) (increase(imxc_txn_total_count{{filter}}[1m]))', 'Request', 'Service', NULL, 't', 'f', 'SVC:{{$labels.xm_service_name}} Pod Transaction Requests Time Avg:{{humanize $value}}ms|{threshold}ms.', '2022-02-15 18:04:55.228', '2022-02-15 18:04:55.228'); +INSERT INTO public.metric_meta2 (id, meta_name, description, expr, resource_type, entity_type, groupby_keys, in_use, anomaly_score, message, created_date, modified_date) VALUES +('imxc_jspd_txn_error_count', 'Service Transaction Error Count', 'Service Transaction Error Count', 'sum by(xm_clst_id, xm_namespace, xm_service_name) (rate(imxc_txn_error_count{{filter}}[1m])) == 0 or sum by(xm_clst_id, xm_namespace, xm_service_name) (rate(imxc_txn_error_count {{filter}} [1m])) ', 'Request', 'Service', NULL, 't', 't', 'SVC:{{$labels.xm_service_name}} Error Request count:{{humanize $value}}%|{threshold}%.', '2021-11-15 16:10:31.352', '2021-11-15 16:12:21.335'); +INSERT INTO public.metric_meta2 (id, meta_name, description, expr, resource_type, entity_type, groupby_keys, in_use, anomaly_score, message, created_date, modified_date) VALUES +('imxc_jspd_txn_per_sec', 'Service Transaction Count (per Second)', 'Service Transaction Count (per Second)', 'sum by(xm_clst_id, xm_namespace, xm_service_name) (rate(imxc_txn_total_count{{filter}}[1m]))', 'Request', 'Service', NULL, 't', 't', 'SVC:{{$labels.xm_service_name}} Svc Transaction count/Seconds:{{humanize $value}}|{threshold}.', '2021-11-15 16:11:19.606', '2021-11-15 16:12:21.335'); +INSERT INTO public.metric_meta2 (id, meta_name, description, expr, resource_type, entity_type, groupby_keys, in_use, anomaly_score, message, created_date, modified_date) VALUES +('imxc_jspd_pod_txn_per_sec', 'Service Pod Transaction Count (per sec)', 'The number of transaction counts per second for pod', 'sum by(xm_clst_id, xm_namespace, xm_pod_id, xm_service_name) (rate(imxc_txn_total_count{{filter}}[1m]))', 'Request', 'Service', NULL, 't', 'f', 'SVC:{{$labels.xm_service_name}} Svc Pod Transaction count/Seconds:{{humanize $value}}|{threshold}.', '2022-02-15 17:59:39.45', '2022-02-15 17:59:39.45'); + + + +-- Auto-generated SQL script #202202221030 +UPDATE public.metric_meta2 + SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (rate(container_cpu_system_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running", {filter}}) without (instance)) * 0) * 100' + WHERE id='container_cpu_system_by_workload'; +UPDATE public.metric_meta2 + SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (rate(container_cpu_system_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running", {filter}}) without (instance)) * 0)' + WHERE id='container_cpu_system_core_by_workload'; +UPDATE public.metric_meta2 + SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (rate(container_cpu_usage_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running", {filter}}) without (instance)) * 0)' + WHERE id='container_cpu_usage_by_workload'; +UPDATE public.metric_meta2 + SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (rate(container_cpu_usage_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running", {filter}}) without (instance)) * 0)' + WHERE id='container_cpu_usage_core_by_workload'; +UPDATE public.metric_meta2 + SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (rate(container_cpu_user_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running", {filter}}) without (instance)) * 0) * 100' + WHERE id='container_cpu_user_by_workload'; +UPDATE public.metric_meta2 + SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (rate(container_cpu_user_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running", {filter}}) without (instance)) * 0)' + WHERE id='container_cpu_user_core_by_workload'; +UPDATE public.metric_meta2 + SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (container_fs_limit_bytes{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running", {filter}}) without (instance)) * 0) / 1073741824' + WHERE id='container_fs_limit_bytes_by_workload'; +UPDATE public.metric_meta2 + SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (rate(container_fs_reads_bytes_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) / 1024' + WHERE id='container_fs_reads_by_workload'; +UPDATE public.metric_meta2 + SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (container_fs_usage_bytes{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) / 1073741824' + WHERE id='container_fs_usage_bytes_by_workload'; +UPDATE public.metric_meta2 + SET expr='sum by (xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) ((container_fs_usage_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0)/ (((container_fs_limit_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) * 100) > 0) or (container_fs_usage_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) / 1000)' + WHERE id='container_fs_usage_by_workload'; +UPDATE public.metric_meta2 + SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (rate(container_fs_writes_bytes_total{xm_cont_name!="POD"}[1m]) + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) / 1024' + WHERE id='container_fs_writes_by_workload'; +UPDATE public.metric_meta2 + SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (container_memory_cache{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) / 1073741824' + WHERE id='container_memory_cache_by_workload'; +UPDATE public.metric_meta2 + SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (container_memory_max_usage_bytes{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) / 1073741824' + WHERE id='container_memory_max_usage_bytes_by_workload'; +UPDATE public.metric_meta2 + SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (container_memory_swap{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) / 1073741824' + WHERE id='container_memory_swap_by_workload'; +UPDATE public.metric_meta2 + SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (container_memory_usage_bytes{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) / 1024 / 1024 / 1024' + WHERE id='container_memory_usage_bytes_by_workload'; +UPDATE public.metric_meta2 + SET expr='sum by (xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) ((container_memory_usage_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) / (((container_spec_memory_limit_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0)) > 0) * 100) or sum by (xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) ((container_memory_usage_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) / 1024 / 1024 / 1024 *100)' + WHERE id='container_memory_usage_by_workload'; +UPDATE public.metric_meta2 + SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (container_memory_working_set_bytes{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) / 1024 / 1024 / 1024' + WHERE id='container_memory_working_set_bytes_by_workload'; + +INSERT INTO public.metric_meta2 (id, meta_name, description, expr, resource_type, entity_type, groupby_keys, in_use, anomaly_score, message, created_date, modified_date) VALUES('imxc_jspd_active_txn_per_sec', 'Service Active Transaction Count (per Second)', 'Service Active Transaction Count (per Second)', 'sum by(xm_clst_id, xm_namespace, xm_service_name) (rate(imxc_txn_active_count {{filter}}[1m]))', 'Request', 'Service', NULL, true, false, 'SVC:{{$labels.xm_service_name}} Svc Active Transaction count/Seconds:{{humanize $value}}|{threshold}.', '2022-03-11 15:51:45.946', '2022-03-11 15:51:45.946') ON +CONFLICT (id) DO +UPDATE +SET + expr = 'sum by(xm_clst_id, xm_namespace, xm_service_name) (rate(imxc_txn_active_count {{filter}}[1m]))' +WHERE id = 'imxc_jspd_active_txn_per_sec'; + +INSERT INTO public.metric_meta2 (id, meta_name, description, expr, resource_type, entity_type, groupby_keys, in_use, anomaly_score, message, created_date, modified_date) VALUES('imxc_jspd_pod_active_txn_per_sec', 'Service Pod Active Transaction Count (per sec)', 'The number of active transaction counts per second for pod', 'sum by(xm_clst_id, xm_namespace, xm_service_name, xm_pod_id) (rate(imxc_txn_active_count{{filter}}[1m]))', 'Request', 'Service', NULL, true, false, 'SVC:{{$labels.xm_service_name}} Svc Pod Active Transaction count/Seconds:{{humanize $value}}|{threshold}.', '2022-03-11 15:53:29.252', '2022-03-11 15:53:29.252') ON +CONFLICT (id) DO +UPDATE +SET + expr = 'sum by(xm_clst_id, xm_namespace, xm_service_name, xm_pod_id) (rate(imxc_txn_active_count{{filter}}[1m]))' +WHERE id = 'imxc_jspd_pod_active_txn_per_sec'; + + +--public.agent_install_file_info + +UPDATE public.agent_install_file_info SET yaml = '--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cloudmoa-cluster-role +rules: + - nonResourceURLs: + - "*" + verbs: + - get + - apiGroups: + - metrics.k8s.io + resources: + - pods + - nodes + verbs: + - get + - list + - watch + - apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch + - update + - apiGroups: + - "" + resources: + - services + verbs: + - get + - list + - watch + - update + - apiGroups: + - "" + resources: + - nodes/stats + - endpoints + - namespaces + - events + verbs: + - get + - list + - watch + - apiGroups: + - apps + resources: + - daemonsets + - deployments + - deployments/scale + - replicasets + - replicasets/scale + - statefulsets + - statefulsets/scale + verbs: + - get + - list + - watch + - apiGroups: + - batch + resources: + - jobs + verbs: + - get + - list + - watch + - update + - apiGroups: + - batch + resources: + - cronjobs + verbs: + - get + - list + - update + - apiGroups: + - storage.j8s.io + resources: + - storageclasses + verbs: + - get + - list + - apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - get + - list + - apiGroups: + - extensions + resources: + - ingresses + verbs: + - get + - list + - apiGroups: + - policy + resources: + - podsecuritypolicies + verbs: + - use + resourceNames: + - imxc-ps + - apiGroups: + - certificates.k8s.io + resourceNames: + - kubernetes.io/kube-apiserver-client-kubelet + resources: + - signers + verbs: + - approve + - apiGroups: + - certificates.k8s.io + resourceNames: + - kubernetes.io/kubelet-serving + resources: + - signers + verbs: + - approve + - apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - watch + - proxy + - apiGroups: + - "" + resources: + - nodes/log + - nodes/metrics + - nodes/proxy + - nodes/spec + - nodes/stats + verbs: + - ''*'' + - apiGroups: + - ''*'' + resources: + - ''*'' + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cloudmoa-restricted-rb + namespace: $CLOUDMOA_NAMESPACE +subjects: + - kind: ServiceAccount + name: default + namespace: $CLOUDMOA_NAMESPACE +roleRef: + kind: ClusterRole + name: cloudmoa-cluster-role + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: cloudmoa-psp + namespace: $CLOUDMOA_NAMESPACE +spec: + privileged: true + seLinux: + rule: RunAsAny + supplementalGroups: + rule: RunAsAny + runAsUser: + rule: RunAsAny + fsGroup: + rule: RunAsAny + hostPorts: + - max: 65535 + min: 0 + hostNetwork: true + hostPID: true + volumes: + - configMap + - secret + - emptyDir + - hostPath + - projected + - downwardAPI + - persistentVolumeClaim +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: cloudmoa-topology-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-topology-agent +spec: + selector: + matchLabels: + app: cloudmoa-topology-agent + template: + metadata: + labels: + app: cloudmoa-topology-agent + spec: + hostNetwork: true + hostPID: true + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + containers: + - name: cloudmoa-topology-agent + image: $DOCKER_REGISTRY_URL/topology-agent:$IMAGE_TAG + imagePullPolicy: Always + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: 500m + memory: 600Mi + securityContext: + privileged: true + volumeMounts: + - mountPath: /host/usr/bin + name: bin-volume + - mountPath: /var/run/docker.sock + name: docker-volume + - mountPath: /host/proc + name: proc-volume + - mountPath: /root + name: root-volume + - mountPath: /log + name: log-volume + env: + - name: DATAGATE + value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: ROOT_DIRECTORY + value: /root + - name: NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: POD_ID + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: LOG_LEVEL + value: "INFO" + volumes: + - name: bin-volume + hostPath: + path: /usr/bin + type: Directory + - name: docker-volume + hostPath: + path: /var/run/docker.sock + - name: proc-volume + hostPath: + path: /proc + - name: root-volume + hostPath: + path: / + - name: log-volume + hostPath: + path: /home'::text WHERE id = 2::bigint; + +UPDATE public.agent_install_file_info SET yaml = '--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: cloudmoa-metric-agent-config + namespace: $CLOUDMOA_NAMESPACE +data: + scaling.rules: | + groups: + - name: scaleup + rules : + - alert : ScaleUpRule + expr: job:webapp_config_open_sessions_current_count:sum > 15 + annotations: + summary: "Scale up when current sessions is greater than 15" + description: "Firing when total sessions active greater than 15" + metric-agent.yml: | + global: + scrape_interval: 15s + + scrape_configs: + - job_name: ''kubernetes-kubelet'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - target_label: xm_entity_type + replacement: ''Node'' + + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (kubelet_running_pod_count) + action: keep + + - job_name: ''kubernetes-node-exporter'' + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - source_labels: [__meta_kubernetes_role] + action: replace + target_label: kubernetes_role + - source_labels: [__address__] + regex: ''(.*):10250'' + replacement: ''${1}:9110'' + target_label: __address__ + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: __instance__ + # set "name" value to "job" + - source_labels: [job] + regex: ''kubernetes-(.*)'' + replacement: ''${1}'' + target_label: name + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Node'' + - source_labels: [__meta_kubernetes_namespace] + separator: ; + regex: (.*) + target_label: xm_namespace + replacement: $1 + action: replace + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total) + action: keep + + + - job_name: ''kubernetes-cadvisor'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + metrics_path: /metrics/cadvisor + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Container'' + + metric_relabel_configs: + - source_labels: [namespace] + target_label: xm_namespace + - source_labels: [pod_name] + target_label: xm_pod_id + - source_labels: [container_name] + target_label: xm_cont_name + - source_labels: [id] + target_label: xm_cont_id + - source_labels: [container_name] + regex: (.+) + action: keep + - source_labels: [ __name__ ] + regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes) + action: keep +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cloudmoa-metric-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-metric-agent +spec: + selector: + matchLabels: + app: cloudmoa-metric-agent + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: cloudmoa-metric-agent + spec: + containers: + - name: cloudmoa-metric-agent + image: $DOCKER_REGISTRY_URL/metric-agent:$IMAGE_TAG + args: + - --config.file=/etc/metric-agent/metric-agent.yml + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 300m + memory: 1000Mi + volumeMounts: + - mountPath: /etc/metric-agent/ + name: config-volume + env: + - name: LOG_LEVEL + value: "INFO" + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: DATAGATE + value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT + - name: STORAGE_TYPE + value: datagate + restartPolicy: Always + volumes: + - name: config-volume + configMap: + name: cloudmoa-metric-agent-config +'::text WHERE id = 3::bigint; + +UPDATE public.agent_install_file_info SET yaml = '--- +apiVersion: v1 +kind: List +items: +- apiVersion: apps/v1 + kind: Deployment + metadata: + name: cloudmoa-trace-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-trace-agent + spec: + selector: + matchLabels: + app: cloudmoa-trace-agent + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: cloudmoa-trace-agent + spec: + securityContext: + runAsNonRoot: true + runAsUser: 65534 + containers: + - image: $DOCKER_REGISTRY_URL/trace-agent:$IMAGE_TAG + name: cloudmoa-trace-agent + resources: + requests: + cpu: 100m + memory: 50Mi + limits: + cpu: 200m + memory: 100Mi + ports: + - containerPort: 5775 + protocol: UDP + - containerPort: 6831 + protocol: UDP + - containerPort: 6832 + protocol: UDP + - containerPort: 5778 + protocol: TCP + env: + - name: LOG_LEVEL + value: "INFO" + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: DATAGATE + value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT +- apiVersion: v1 + kind: Service + metadata: + name: cloudmoa-trace-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-trace-agent + spec: + ports: + - name: agent-zipkin-thrift + port: 5775 + protocol: UDP + targetPort: 5775 + - name: agent-compact + port: 6831 + protocol: UDP + targetPort: 6831 + - name: agent-binary + port: 6832 + protocol: UDP + targetPort: 6832 + - name: agent-configs + port: 5778 + protocol: TCP + targetPort: 5778 + selector: + app: cloudmoa-trace-agent + type: ClusterIP'::text WHERE id = 7::bigint; + +UPDATE public.agent_install_file_info SET yaml = '--- +apiVersion: v1 +kind: Service +metadata: + annotations: + prometheus.io/scrape: ''true'' + labels: + app: cloudmoa-node-exporter + name: cloudmoa-node-exporter + name: cloudmoa-node-exporter + namespace: $CLOUDMOA_NAMESPACE +spec: + clusterIP: None + ports: + - name: scrape + port: 9110 + protocol: TCP + selector: + app: cloudmoa-node-exporter + type: ClusterIP +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: cloudmoa-node-exporter + namespace: $CLOUDMOA_NAMESPACE +spec: + selector: + matchLabels: + app: cloudmoa-node-exporter + template: + metadata: + labels: + app: cloudmoa-node-exporter + name: cloudmoa-node-exporter + spec: + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + containers: + - image: $DOCKER_REGISTRY_URL/prom/node-exporter + name: cloudmoa-node-exporter + ports: + - containerPort: 9110 + hostPort: 9110 + name: scrape + args: + - --path.procfs=/host/proc + - --path.sysfs=/host/sys + - --path.rootfs=/host/root + - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|run|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/) + - --collector.tcpstat + - --web.listen-address=:9110 + # --log.level=debug + resources: + limits: + cpu: 250m + memory: 180Mi + requests: + cpu: 102m + memory: 180Mi + volumeMounts: + - mountPath: /host/proc + name: proc + readOnly: false + - mountPath: /host/sys + name: sys + readOnly: false + - mountPath: /host/root + mountPropagation: HostToContainer + name: root + readOnly: true + hostNetwork: true + hostPID: true + securityContext: + runAsNonRoot: true + runAsUser: 65534 + volumes: + - hostPath: + path: /proc + name: proc + - hostPath: + path: /sys + name: sys + - hostPath: + path: / + name: root +'::text WHERE id = 4::bigint; diff --git a/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.3.2.psql b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.3.2.psql new file mode 100644 index 0000000..e84e9be --- /dev/null +++ b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.3.2.psql @@ -0,0 +1,459 @@ + UPDATE public.agent_install_file_info SET yaml = '--- + apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRole + metadata: + name: cloudmoa-cluster-role + rules: + - nonResourceURLs: + - "*" + verbs: + - get + - apiGroups: + - metrics.k8s.io + resources: + - pods + - nodes + verbs: + - get + - list + - watch + - apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch + - update + - apiGroups: + - "" + resources: + - services + verbs: + - get + - list + - watch + - update + - apiGroups: + - "" + resources: + - nodes/stats + - endpoints + - namespaces + - events + verbs: + - get + - list + - watch + - apiGroups: + - apps + resources: + - daemonsets + - deployments + - deployments/scale + - replicasets + - replicasets/scale + - statefulsets + - statefulsets/scale + verbs: + - get + - list + - watch + - apiGroups: + - batch + resources: + - jobs + verbs: + - get + - list + - watch + - update + - apiGroups: + - batch + resources: + - cronjobs + verbs: + - get + - list + - update + - apiGroups: + - storage.j8s.io + resources: + - storageclasses + verbs: + - get + - list + - apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - get + - list + - apiGroups: + - extensions + resources: + - ingresses + verbs: + - get + - list + - apiGroups: + - policy + resources: + - podsecuritypolicies + verbs: + - use + resourceNames: + - imxc-ps + - apiGroups: + - certificates.k8s.io + resourceNames: + - kubernetes.io/kube-apiserver-client-kubelet + resources: + - signers + verbs: + - approve + - apiGroups: + - certificates.k8s.io + resourceNames: + - kubernetes.io/kubelet-serving + resources: + - signers + verbs: + - approve + - apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - watch + - proxy + - apiGroups: + - "" + resources: + - nodes/log + - nodes/metrics + - nodes/proxy + - nodes/spec + - nodes/stats + verbs: + - ''*'' + - apiGroups: + - ''*'' + resources: + - ''*'' + verbs: + - get + - list + - watch + --- + apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRoleBinding + metadata: + name: cloudmoa-restricted-rb + namespace: $CLOUDMOA_NAMESPACE + subjects: + - kind: ServiceAccount + name: default + namespace: $CLOUDMOA_NAMESPACE + roleRef: + kind: ClusterRole + name: cloudmoa-cluster-role + apiGroup: rbac.authorization.k8s.io + --- + apiVersion: policy/v1beta1 + kind: PodSecurityPolicy + metadata: + name: cloudmoa-psp + namespace: $CLOUDMOA_NAMESPACE + spec: + privileged: true + seLinux: + rule: RunAsAny + supplementalGroups: + rule: RunAsAny + runAsUser: + rule: RunAsAny + fsGroup: + rule: RunAsAny + hostPorts: + - max: 65535 + min: 0 + hostNetwork: true + hostPID: true + volumes: + - configMap + - secret + - emptyDir + - hostPath + - projected + - downwardAPI + - persistentVolumeClaim + --- + apiVersion: apps/v1 + kind: DaemonSet + metadata: + name: cloudmoa-topology-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-topology-agent + spec: + selector: + matchLabels: + app: cloudmoa-topology-agent + template: + metadata: + labels: + app: cloudmoa-topology-agent + spec: + hostNetwork: true + hostPID: true + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + containers: + - name: cloudmoa-topology-agent + image: $DOCKER_REGISTRY_URL/topology-agent:$IMAGE_TAG + imagePullPolicy: Always + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: 500m + memory: 600Mi + securityContext: + privileged: true + volumeMounts: + - mountPath: /host/usr/bin + name: bin-volume + - mountPath: /var/run/docker.sock + name: docker-volume + - mountPath: /host/proc + name: proc-volume + - mountPath: /root + name: root-volume + - mountPath: /log + name: log-volume + env: + - name: DATAGATE + value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: ROOT_DIRECTORY + value: /root + - name: NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: POD_ID + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: LOG_LEVEL + value: "INFO" + volumes: + - name: bin-volume + hostPath: + path: /usr/bin + type: Directory + - name: docker-volume + hostPath: + path: /var/run/docker.sock + - name: proc-volume + hostPath: + path: /proc + - name: root-volume + hostPath: + path: / + - name: log-volume + hostPath: + path: /home' WHERE id = 2; + +UPDATE public.agent_install_file_info SET yaml = '--- + apiVersion: v1 + kind: ConfigMap + metadata: + name: cloudmoa-metric-agent-config + namespace: $CLOUDMOA_NAMESPACE + data: + scaling.rules: | + groups: + - name: scaleup + rules : + - alert : ScaleUpRule + expr: job:webapp_config_open_sessions_current_count:sum > 15 + annotations: + summary: "Scale up when current sessions is greater than 15" + description: "Firing when total sessions active greater than 15" + metric-agent.yml: | + global: + scrape_interval: 15s + + scrape_configs: + - job_name: ''kubernetes-kubelet'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - target_label: xm_entity_type + replacement: ''Node'' + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (kubelet_running_pod_count) + action: keep + + - job_name: ''kubernetes-node-exporter'' + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - source_labels: [__meta_kubernetes_role] + action: replace + target_label: kubernetes_role + - source_labels: [__address__] + regex: ''(.*):10250'' + replacement: ''${1}:9110'' + target_label: __address__ + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: __instance__ + # set "name" value to "job" + - source_labels: [job] + regex: ''kubernetes-(.*)'' + replacement: ''${1}'' + target_label: name + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Node'' + - source_labels: [__meta_kubernetes_namespace] + separator: ; + regex: (.*) + target_label: xm_namespace + replacement: $1 + action: replace + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total) + action: keep + + - job_name: ''kubernetes-cadvisor'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: node + metrics_path: /metrics/cadvisor + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Container'' + + metric_relabel_configs: + - source_labels: [namespace] + target_label: xm_namespace + - source_labels: [pod] + target_label: xm_pod_id + - source_labels: [container] + target_label: xm_cont_name + - source_labels: [id] + target_label: xm_cont_id + - source_labels: [container] + regex: (.+) + action: keep + - source_labels: [ __name__ ] + regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes) + action: keep + --- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: cloudmoa-metric-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-metric-agent + spec: + selector: + matchLabels: + app: cloudmoa-metric-agent + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: cloudmoa-metric-agent + spec: + containers: + - name: cloudmoa-metric-agent + image: $DOCKER_REGISTRY_URL/metric-agent:$IMAGE_TAG + args: + - --config.file=/etc/metric-agent/metric-agent.yml + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 300m + memory: 1000Mi + volumeMounts: + - mountPath: /etc/metric-agent/ + name: config-volume + env: + - name: LOG_LEVEL + value: "INFO" + - name: LOG_MAXAGE + value: "1" + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: DATAGATE + value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT + - name: STORAGE_TYPE + value: datagate + restartPolicy: Always + volumes: + - name: config-volume + configMap: + name: cloudmoa-metric-agent-config + ' WHERE id = 6; \ No newline at end of file diff --git a/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.4.1.psql b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.4.1.psql new file mode 100644 index 0000000..0d20f2c --- /dev/null +++ b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.4.1.psql @@ -0,0 +1,1379 @@ +CREATE TABLE public.cloud_user_setting ( + user_id character varying(255) NOT NULL, + lang character varying(20) DEFAULT 'en', + theme character varying(20) DEFAULT 'dark', + access_token integer DEFAULT 30, + refresh_token integer DEFAULT 10080, + error_msg boolean DEFAULT false, + alert_sound boolean DEFAULT false, + session_persistence boolean DEFAULT true, + gpu_acc_topology boolean DEFAULT true, + created_date timestamp without time zone, + modified_date timestamp without time zone +); + +ALTER TABLE public.cloud_user_setting OWNER TO admin; + +ALTER TABLE ONLY public.cloud_user_setting ADD CONSTRAINT cloud_user_setting_pkey PRIMARY KEY (user_id); + +INSERT INTO public.cloud_user_setting +(user_id, lang, theme, access_token, refresh_token, error_msg, alert_sound, session_persistence, gpu_acc_topology, created_date, modified_date) +VALUES('admin', null, null, null, null, false, false, true, true, now(), null); + +INSERT INTO public.cloud_user_setting +(user_id, lang, theme, access_token, refresh_token, error_msg, alert_sound, session_persistence, gpu_acc_topology, created_date, modified_date) +VALUES('owner', null, null, null, null, false, false, true, true, now(), null); + +-- 더존(3.3.2) 에서 누락되었던 항목 모두 추가 +INSERT INTO public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) VALUES ('normal_score', '20', null, null, 'anomaly', '2020-07-07 18:15:55.000000', '2020-07-07 18:15:53.000000'); +INSERT INTO public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) VALUES ('attention_score', '60', null, null, 'anomaly', '2020-07-07 09:18:04.968765', '2020-07-07 09:18:04.968765'); +INSERT INTO public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) VALUES ('warning_score', '90', null, null, 'anomaly', '2020-07-07 09:18:17.091678', '2020-07-07 09:18:17.091678'); +INSERT INTO public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) VALUES ('collection_weeks', '5', null, null, 'anomaly', '2020-07-13 03:52:44.445408', '2020-07-13 03:52:44.445408'); + +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('topology_storage_period', 7, 'retention period setting value for topology information', null, 'storage', '2020-07-30 13:54:52', null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('trace_storage_period', 3, 'retention period setting value for trace data', null, 'storage', '2020-07-30 13:54:52', null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('event_storage_period', 7, 'retention period setting value for event data', null, 'storage', '2020-07-30 13:54:52', null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('metric_storage_period', 7, 'retention period setting value for metric data', null, 'storage', '2020-07-30 13:54:52', null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('sparse_storage_period', 90, 'retention period setting value for sparse log', null, 'storage', '2020-07-30 13:54:52', null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('anomaly_storage_period', 7, 'retention period setting value for anomaly score', null, 'storage', '2020-07-30 13:54:52', null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('alert_storage_period', 7, 'retention period setting value for alert data', null, 'storage', '2020-07-30 13:54:52', null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('audit_storage_period', 7, 'retention period setting value for audit data', null, 'storage', now(), null); + +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('topology_idx', 'kubernetes_cluster_info:kubernetes_cluster_history:kubernetes_cronjob_info:kubernetes_info:kubernetes_job_info:kubernetes_network_connectivity:kubernetes_pod_info:kubernetes_pod_history', 'elastic search topology type data index', null, 'storageidx', '2020-07-30 13:54:52', null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('trace_idx', 'spaninfo:sta_httpapi:sta_httpsummary:sta_podinfo:sta_relation:sta_tracetrend:sta_externalrelation:sta_traceinfo:jspd_ilm', 'elastic search trace type data index', null, 'storageidx', '2020-07-30 13:54:52', null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('event_idx', 'kubernetes_event_info', 'elastic search for event data index', null, 'storageidx', '2020-07-30 13:54:52', null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('sparse_idx', 'sparse_model:sparse_log', 'elastic search sparse data index', null, 'storageidx', '2020-07-30 13:54:52', null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('anomaly_idx', 'entity_score:metric_score:timeline_score', 'elastic search amomaly data index', null, 'storageidx', '2020-07-30 13:54:52', null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('alert_idx', 'alert_event_history', 'elastic search alert data index', null, 'storageidx', '2020-07-30 13:54:52', null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('audit_idx', 'kubernetes_audit_log', 'elastic search audit type data index', null, 'storageidx', now(), null); + +-- insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) values ('ratelimiting', 2.0, '{"type" : "int", "operator" : "range", "minVal" : "1", "maxVal" : "3000", "desc" : "The time-based sampling method allows input as an integer (e.g. 1 monitors only 1 trace per second)" }', null, 'tracesampling', '2020-07-30 13:54:52', null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('probabilistic', 0.1, '{"type" : "float", "operator" : "range", "minVal" : "0", "maxVal" : "1.0", "desc" : "Probability-based sampling method allows input between 0 and 1 (e.g. 0.1 monitors only 10% of trace information)" }', null, 'tracesampling', '2020-07-30 13:54:52', null); + +INSERT INTO common_setting values('alert_expression','==,<=,<,>=,>', 'alert expression for user custom', null,'alert', now(), now()); + +INSERT INTO common_setting values('job_duration_range','86400', 'job duration range for average', null,'job', now(), now()); + +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('Topology Agent', 'topology-agent', 'topology agent deployment name', null, 'modules', now(), null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('Metric Agent', 'metric-agent', 'metric agent deployment name', null, 'modules', now(), null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('Trace Agent', 'trace-agent', 'trace agent deployment name', null, 'modules', now(), null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('Datagate', 'datagate', 'datagate deployment name', null, 'modules', now(), null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('Jspd Collector', 'jspd-lite-collector', 'jspd collector deployment name', null, 'modules', now(), null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('Metric Collector', 'metric-collector', 'metric collector deployment name', null, 'modules', now(), null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('Cloudmoa Collector', 'imxc-collector', 'cloudmoa collector deployment name', null, 'modules', now(), null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('Authentication Server', 'auth-server', 'authentication server deployment name', null, 'modules', now(), null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('Notification Server', 'noti-server', 'notification server deployment name', null, 'modules', now(), null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('Eureka Server', 'eureka', 'eureka server deployment name', null, 'modules', now(), null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('Zuul Server', 'zuul-deployment', 'zuul server deployment name', null, 'modules', now(), null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('Api Server', 'imxc-api-demo', 'api server deployment name', null, 'modules', now(), null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('Ui Server', 'imxc-ui-demo', 'ui server deployment name', null, 'modules', now(), null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('Metric Analyzer Master', 'metric-analyzer-master', 'metric analyzer master deployment name', null, 'modules', now(), null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('Metric Analyzer Worker', 'metric-analyzer-worker', 'metric analyzer worker deployment name', null, 'modules', now(), null); +insert into public.common_setting (code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +values ('Kafka Stream Txntrend', 'kafka-stream-txntrend-deployment', 'kafka stream txntrend deployment name', null, 'modules', now(), null); + +INSERT INTO public.common_setting +(code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +VALUES('error_msg', 'false', 'Error Message default value', '', 'user_setting', now(), null); + +INSERT INTO public.common_setting +(code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +VALUES('alert_sound', 'false', 'Alert Sound default value', '', 'user_setting', now(), null); + +INSERT INTO public.common_setting +(code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +VALUES('session_persistence', 'true', 'Session Persistence default value', '', 'user_setting', now(), null); + +INSERT INTO public.common_setting +(code_id, code_value, code_desc, code_auth, code_group, created_date, modified_date) +VALUES('gpu_acc_topology', 'true', 'GPU Accelerated Topology default value', '', 'user_setting', now(), null); + +UPDATE public.agent_install_file_info +SET yaml = '--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cloudmoa-cluster-role +rules: + - nonResourceURLs: + - "*" + verbs: + - get + - apiGroups: + - metrics.k8s.io + resources: + - pods + - nodes + verbs: + - get + - list + - watch + - apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch + - update + - apiGroups: + - "" + resources: + - services + verbs: + - get + - list + - watch + - update + - apiGroups: + - "" + resources: + - nodes/stats + - endpoints + - namespaces + - events + verbs: + - get + - list + - watch + - apiGroups: + - apps + resources: + - daemonsets + - deployments + - deployments/scale + - replicasets + - replicasets/scale + - statefulsets + - statefulsets/scale + verbs: + - get + - list + - watch + - update + - apiGroups: + - batch + resources: + - jobs + verbs: + - get + - list + - watch + - update + - apiGroups: + - batch + resources: + - cronjobs + verbs: + - get + - list + - update + - apiGroups: + - storage.j8s.io + resources: + - storageclasses + verbs: + - get + - list + - apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - get + - list + - apiGroups: + - extensions + resources: + - ingresses + verbs: + - get + - list + - apiGroups: + - policy + resources: + - podsecuritypolicies + verbs: + - use + resourceNames: + - imxc-ps + - apiGroups: + - certificates.k8s.io + resourceNames: + - kubernetes.io/kube-apiserver-client-kubelet + resources: + - signers + verbs: + - approve + - apiGroups: + - certificates.k8s.io + resourceNames: + - kubernetes.io/kubelet-serving + resources: + - signers + verbs: + - approve + - apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - watch + - proxy + - apiGroups: + - "" + resources: + - nodes/log + - nodes/metrics + - nodes/proxy + - nodes/spec + - nodes/stats + verbs: + - ''*'' + - apiGroups: + - ''*'' + resources: + - ''*'' + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cloudmoa-restricted-rb + namespace: $CLOUDMOA_NAMESPACE +subjects: + - kind: ServiceAccount + name: default + namespace: $CLOUDMOA_NAMESPACE +roleRef: + kind: ClusterRole + name: cloudmoa-cluster-role + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: cloudmoa-psp + namespace: $CLOUDMOA_NAMESPACE +spec: + privileged: true + seLinux: + rule: RunAsAny + supplementalGroups: + rule: RunAsAny + runAsUser: + rule: RunAsAny + fsGroup: + rule: RunAsAny + hostPorts: + - max: 65535 + min: 0 + hostNetwork: true + hostPID: true + volumes: + - configMap + - secret + - emptyDir + - hostPath + - projected + - downwardAPI + - persistentVolumeClaim +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: cloudmoa-topology-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-topology-agent +spec: + selector: + matchLabels: + app: cloudmoa-topology-agent + template: + metadata: + labels: + app: cloudmoa-topology-agent + spec: + hostNetwork: true + hostPID: true + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + containers: + - name: cloudmoa-topology-agent + image: $DOCKER_REGISTRY_URL/topology-agent:$IMAGE_TAG + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: 500m + memory: 600Mi + securityContext: + privileged: true + volumeMounts: + - mountPath: /host/usr/bin + name: bin-volume + - mountPath: /var/run/docker.sock + name: docker-volume + - mountPath: /host/proc + name: proc-volume + - mountPath: /root + name: root-volume + - mountPath: /log + name: log-volume + env: + - name: DATAGATE + value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: ROOT_DIRECTORY + value: /root + - name: NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: POD_ID + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: LOG_LEVEL + value: "INFO" + volumes: + - name: bin-volume + hostPath:88888889 + path: /usr/bin + type: Directory + - name: docker-volume + hostPath: + path: /var/run/docker.sock + - name: proc-volume + hostPath: + path: /proc + - name: root-volume + hostPath: + path: / + - name: log-volume + hostPath: + path: /home'::text +WHERE id = 2::bigint; + +UPDATE public.common_setting +SET code_group='storageidx' +WHERE code_id='topology_idx'; + +UPDATE public.common_setting +SET code_value='spaninfo:sta_httpapi:sta_httpsummary:sta_podinfo:sta_relation:sta_tracetrend:sta_externalrelation:sta_traceinfo:jspd_ilm', + code_group='storageidx' +WHERE code_id='trace_idx'; + +UPDATE public.common_setting +SET code_group='storageidx' +WHERE code_id='event_idx'; + +UPDATE public.common_setting +SET code_group='storageidx' +WHERE code_id='sparse_idx'; + +UPDATE public.common_setting +SET code_group='storageidx' +WHERE code_id='anomaly_idx'; + +UPDATE public.common_setting +SET code_value='alert_event_history', + code_group='storageidx' +WHERE code_id='alert_idx'; + +UPDATE public.common_setting +SET code_group='storageidx' +WHERE code_id='audit_idx'; + +UPDATE public.agent_install_file_info SET yaml = '--- +apiVersion: v1 +kind: Service +metadata: + annotations: + prometheus.io/scrape: ''true'' + labels: + app: cloudmoa-node-exporter + name: cloudmoa-node-exporter + name: cloudmoa-node-exporter + namespace: $CLOUDMOA_NAMESPACE +spec: + clusterIP: None + ports: + - name: scrape + port: 9110 + protocol: TCP + selector: + app: cloudmoa-node-exporter + type: ClusterIP +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: cloudmoa-node-exporter + namespace: $CLOUDMOA_NAMESPACE +spec: + selector: + matchLabels: + app: cloudmoa-node-exporter + template: + metadata: + labels: + app: cloudmoa-node-exporter + name: cloudmoa-node-exporter + spec: + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + containers: + - image: $DOCKER_REGISTRY_URL/node-exporter + name: cloudmoa-node-exporter + ports: + - containerPort: 9110 + hostPort: 9110 + name: scrape + args: + - --path.procfs=/host/proc + - --path.sysfs=/host/sys + - --path.rootfs=/host/root + - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|run|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/) + - --collector.tcpstat + - --web.listen-address=:9110 + # --log.level=debug + resources: + limits: + cpu: 250m + memory: 180Mi + requests: + cpu: 102m + memory: 180Mi + volumeMounts: + - mountPath: /host/proc + name: proc + readOnly: false + - mountPath: /host/sys + name: sys + readOnly: false + - mountPath: /host/root + mountPropagation: HostToContainer + name: root + readOnly: true + hostNetwork: true + hostPID: true + securityContext: + runAsNonRoot: true + runAsUser: 65534 + volumes: + - hostPath: + path: /proc + name: proc + - hostPath: + path: /sys + name: sys + - hostPath: + path: / + name: root +'::text WHERE id = 4::bigint; + +UPDATE public.agent_install_file_info SET yaml = '--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: system:cloudmoa-aggregated-metrics-reader + labels: + rbac.authorization.k8s.io/aggregate-to-view: "true" + rbac.authorization.k8s.io/aggregate-to-edit: "true" + rbac.authorization.k8s.io/aggregate-to-admin: "true" +rules: + - apiGroups: ["metrics.k8s.io"] + resources: ["pods"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cloudmoa-metrics-server:system:auth-delegator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:auth-delegator +subjects: + - kind: ServiceAccount + name: cloudmoa-metrics-server + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: cloudmoa-metrics-server-auth-reader + namespace: kube-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: extension-apiserver-authentication-reader +subjects: + - kind: ServiceAccount + name: cloudmoa-metrics-server + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: system:cloudmoa-metrics-server +rules: + - apiGroups: + - "" + resources: + - pods + - nodes + - nodes/stats + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: system:cloudmoa-metrics-server +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:cloudmoa-metrics-server +subjects: + - kind: ServiceAccount + name: cloudmoa-metrics-server + namespace: kube-system +--- +apiVersion: v1 +kind: Service +metadata: + name: cloudmoa-metrics-server + namespace: kube-system + labels: + kubernetes.io/name: "Metrics-server" +spec: + selector: + k8s-app: cloudmoa-metrics-server + ports: + - port: 443 + protocol: TCP + targetPort: 443 +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: cloudmoa-metrics-server + namespace: kube-system +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cloudmoa-metrics-server + namespace: kube-system + labels: + k8s-app: cloudmoa-metrics-server +spec: + selector: + matchLabels: + k8s-app: cloudmoa-metrics-server + template: + metadata: + name: cloudmoa-metrics-server + labels: + k8s-app: cloudmoa-metrics-server + spec: + serviceAccountName: cloudmoa-metrics-server + volumes: + # mount in tmp so we can safely use from-scratch images and/or read-only containers + - name: tmp-dir + emptyDir: {} + containers: + - name: cloudmoa-metrics-server + image: $DOCKER_REGISTRY_URL/metrics-server-amd64 + command: + - /metrics-server + - --logtostderr + - --v=4 + - --kubelet-insecure-tls=true + - --kubelet-preferred-address-types=InternalIP,Hostname,InternalDNS,ExternalDNS,ExternalIP + volumeMounts: + - name: tmp-dir + mountPath: /tmp1'::text WHERE id = 5::bigint; + +UPDATE public.agent_install_file_info SET yaml = '--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: cloudmoa-metric-agent-config + namespace: $CLOUDMOA_NAMESPACE +data: + scaling.rules: | + groups: + - name: scaleup + rules : + - alert : ScaleUpRule + expr: job:webapp_config_open_sessions_current_count:sum > 15 + annotations: + summary: "Scale up when current sessions is greater than 15" + description: "Firing when total sessions active greater than 15" + metric-agent.yml: | + global: + scrape_interval: 15s + + scrape_configs: + - job_name: ''kubernetes-kubelet'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - target_label: xm_entity_type + replacement: ''Node'' + + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (kubelet_running_pod_count) + action: keep + + - job_name: ''kubernetes-node-exporter'' + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - source_labels: [__meta_kubernetes_role] + action: replace + target_label: kubernetes_role + - source_labels: [__address__] + regex: ''(.*):10250'' + replacement: ''${1}:9110'' + target_label: __address__ + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: __instance__ + # set "name" value to "job" + - source_labels: [job] + regex: ''kubernetes-(.*)'' + replacement: ''${1}'' + target_label: name + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Node'' + - source_labels: [__meta_kubernetes_namespace] + separator: ; + regex: (.*) + target_label: xm_namespace + replacement: $1 + action: replace + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total) + action: keep + + + - job_name: ''kubernetes-cadvisor'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + metrics_path: /metrics/cadvisor + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Container'' + + metric_relabel_configs: + - source_labels: [namespace] + target_label: xm_namespace + - source_labels: [pod_name] + target_label: xm_pod_id + - source_labels: [container_name] + target_label: xm_cont_name + - source_labels: [id] + target_label: xm_cont_id + - source_labels: [container_name] + regex: (.+) + action: keep + - source_labels: [ __name__ ] + regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes) + action: keep +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cloudmoa-metric-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-metric-agent +spec: + selector: + matchLabels: + app: cloudmoa-metric-agent + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: cloudmoa-metric-agent + spec: + containers: + - name: cloudmoa-metric-agent + image: $DOCKER_REGISTRY_URL/metric-agent:$IMAGE_TAG + args: + - --config.file=/etc/metric-agent/metric-agent.yml + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 300m + memory: 1000Mi + volumeMounts: + - mountPath: /etc/metric-agent/ + name: config-volume + env: + - name: LOG_LEVEL + value: "INFO" + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: DATAGATE + value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT + - name: STORAGE_TYPE + value: datagate + restartPolicy: Always + volumes: + - name: config-volume + configMap: + name: cloudmoa-metric-agent-config +'::text WHERE id = 3::bigint; + +UPDATE public.agent_install_file_info SET yaml = '--- +apiVersion: v1 +kind: List +items: +- apiVersion: apps/v1 + kind: Deployment + metadata: + name: cloudmoa-trace-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-trace-agent + spec: + selector: + matchLabels: + app: cloudmoa-trace-agent + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: cloudmoa-trace-agent + spec: + securityContext: + runAsNonRoot: true + runAsUser: 65534 + containers: + - image: $DOCKER_REGISTRY_URL/trace-agent:$IMAGE_TAG + name: cloudmoa-trace-agent + resources: + requests: + cpu: 100m + memory: 50Mi + limits: + cpu: 200m + memory: 100Mi + ports: + - containerPort: 5775 + protocol: UDP + - containerPort: 6831 + protocol: UDP + - containerPort: 6832 + protocol: UDP + - containerPort: 5778 + protocol: TCP + env: + - name: LOG_LEVEL + value: "INFO" + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: DATAGATE + value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT +- apiVersion: v1 + kind: Service + metadata: + name: cloudmoa-trace-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-trace-agent + spec: + ports: + - name: agent-zipkin-thrift + port: 5775 + protocol: UDP + targetPort: 5775 + - name: agent-compact + port: 6831 + protocol: UDP + targetPort: 6831 + - name: agent-binary + port: 6832 + protocol: UDP + targetPort: 6832 + - name: agent-configs + port: 5778 + protocol: TCP + targetPort: 5778 + selector: + app: cloudmoa-trace-agent + type: ClusterIP'::text WHERE id = 7::bigint; + +UPDATE public.agent_install_file_info SET yaml = '--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cloudmoa-cluster-role +rules: + - nonResourceURLs: + - "*" + verbs: + - get + - apiGroups: + - metrics.k8s.io + resources: + - pods + - nodes + verbs: + - get + - list + - watch + - apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch + - update + - apiGroups: + - "" + resources: + - services + verbs: + - get + - list + - watch + - update + - apiGroups: + - "" + resources: + - nodes/stats + - endpoints + - namespaces + - events + verbs: + - get + - list + - watch + - apiGroups: + - apps + resources: + - daemonsets + - deployments + - deployments/scale + - replicasets + - replicasets/scale + - statefulsets + - statefulsets/scale + verbs: + - get + - list + - watch + - update + - apiGroups: + - batch + resources: + - jobs + verbs: + - get + - list + - watch + - update + - apiGroups: + - batch + resources: + - cronjobs + verbs: + - get + - list + - update + - apiGroups: + - storage.j8s.io + resources: + - storageclasses + verbs: + - get + - list + - apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - get + - list + - apiGroups: + - extensions + resources: + - ingresses + verbs: + - get + - list + - apiGroups: + - policy + resources: + - podsecuritypolicies + verbs: + - use + resourceNames: + - imxc-ps + - apiGroups: + - certificates.k8s.io + resourceNames: + - kubernetes.io/kube-apiserver-client-kubelet + resources: + - signers + verbs: + - approve + - apiGroups: + - certificates.k8s.io + resourceNames: + - kubernetes.io/kubelet-serving + resources: + - signers + verbs: + - approve + - apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - watch + - proxy + - apiGroups: + - "" + resources: + - nodes/log + - nodes/metrics + - nodes/proxy + - nodes/spec + - nodes/stats + verbs: + - ''*'' + - apiGroups: + - ''*'' + resources: + - ''*'' + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cloudmoa-restricted-rb + namespace: $CLOUDMOA_NAMESPACE +subjects: + - kind: ServiceAccount + name: default + namespace: $CLOUDMOA_NAMESPACE +roleRef: + kind: ClusterRole + name: cloudmoa-cluster-role + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: cloudmoa-psp + namespace: $CLOUDMOA_NAMESPACE +spec: + privileged: true + seLinux: + rule: RunAsAny + supplementalGroups: + rule: RunAsAny + runAsUser: + rule: RunAsAny + fsGroup: + rule: RunAsAny + hostPorts: + - max: 65535 + min: 0 + hostNetwork: true + hostPID: true + volumes: + - configMap + - secret + - emptyDir + - hostPath + - projected + - downwardAPI + - persistentVolumeClaim +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: cloudmoa-topology-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-topology-agent +spec: + selector: + matchLabels: + app: cloudmoa-topology-agent + template: + metadata: + labels: + app: cloudmoa-topology-agent + spec: + hostNetwork: true + hostPID: true + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + containers: + - name: cloudmoa-topology-agent + image: $DOCKER_REGISTRY_URL/topology-agent:$IMAGE_TAG + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: 500m + memory: 600Mi + securityContext: + privileged: true + volumeMounts: + - mountPath: /host/usr/bin + name: bin-volume + - mountPath: /var/run/docker.sock + name: docker-volume + - mountPath: /host/proc + name: proc-volume + - mountPath: /root + name: root-volume + - mountPath: /log + name: log-volume + env: + - name: DATAGATE + value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: ROOT_DIRECTORY + value: /root + - name: NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: POD_ID + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: LOG_LEVEL + value: "INFO" + volumes: + - name: bin-volume + hostPath: + path: /usr/bin + type: Directory + - name: docker-volume + hostPath: + path: /var/run/docker.sock + - name: proc-volume + hostPath: + path: /proc + - name: root-volume + hostPath: + path: / + - name: log-volume + hostPath: + path: /home'::text WHERE id = 2::bigint; + +UPDATE public.agent_install_file_info SET yaml = '--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: cloudmoa-metric-agent-config + namespace: $CLOUDMOA_NAMESPACE +data: + scaling.rules: | + groups: + - name: scaleup + rules : + - alert : ScaleUpRule + expr: job:webapp_config_open_sessions_current_count:sum > 15 + annotations: + summary: "Scale up when current sessions is greater than 15" + description: "Firing when total sessions active greater than 15" + metric-agent.yml: | + global: + scrape_interval: 15s + + scrape_configs: + - job_name: ''kubernetes-kubelet'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - target_label: xm_entity_type + replacement: ''Node'' + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (kubelet_running_pod_count) + action: keep + + - job_name: ''kubernetes-node-exporter'' + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - source_labels: [__meta_kubernetes_role] + action: replace + target_label: kubernetes_role + - source_labels: [__address__] + regex: ''(.*):10250'' + replacement: ''${1}:9110'' + target_label: __address__ + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: __instance__ + # set "name" value to "job" + - source_labels: [job] + regex: ''kubernetes-(.*)'' + replacement: ''${1}'' + target_label: name + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Node'' + - source_labels: [__meta_kubernetes_namespace] + separator: ; + regex: (.*) + target_label: xm_namespace + replacement: $1 + action: replace + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total) + action: keep + + - job_name: ''kubernetes-cadvisor'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: node + metrics_path: /metrics/cadvisor + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Container'' + + metric_relabel_configs: + - source_labels: [namespace] + target_label: xm_namespace + - source_labels: [pod] + target_label: xm_pod_id + - source_labels: [container] + target_label: xm_cont_name + - source_labels: [id] + target_label: xm_cont_id + - source_labels: [container] + regex: (.+) + action: keep + - source_labels: [ __name__ ] + regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes) + action: keep +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cloudmoa-metric-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-metric-agent +spec: + selector: + matchLabels: + app: cloudmoa-metric-agent + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: cloudmoa-metric-agent + spec: + containers: + - name: cloudmoa-metric-agent + image: $DOCKER_REGISTRY_URL/metric-agent:$IMAGE_TAG + args: + - --config.file=/etc/metric-agent/metric-agent.yml + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 300m + memory: 1000Mi + volumeMounts: + - mountPath: /etc/metric-agent/ + name: config-volume + env: + - name: LOG_LEVEL + value: "INFO" + - name: LOG_MAXAGE + value: "1" + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: DATAGATE + value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT + - name: STORAGE_TYPE + value: datagate + restartPolicy: Always + volumes: + - name: config-volume + configMap: + name: cloudmoa-metric-agent-config +'::text WHERE id = 6::bigint; + +ALTER TABLE public.alert_rule_config_info ALTER COLUMN config_data TYPE text; + +update alert_rule_config_info +set config_data = '\n - alert: "${ALERT}"\n expr: "${EXPR}"\n labels:\n level: "${LEVEL}"\n for: "${FOR}"\n annotations:\n xm_service_name: "{{ $labels.xm_service_name }}"\n level: "${LEVEL}"\n meta_id: "${META_ID}"\n xm_node_id: "{{ $labels.xm_node_id }}"\n threshold: ${THRESHOLD}\n xm_container_id: "{{ $labels.xm_cont_name }}"\n message: "${MESSAGE}"\n rule_id: ${RULE_ID}\n xm_pod_id: "{{ $labels.xm_pod_id }}"\n xm_clst_id: "{{ $labels.xm_clst_id }}"\n xm_namespace: "{{ $labels.xm_namespace }}"\n value: "{{ $value }}"\n xm_entity_type: "{{ $labels.xm_entity_type }}"\n alert_entity_type: "${ALERT_ENTITY_TYPE}"' +where config_id = 'rules'; + +ALTER TABLE public.alert_config_info ALTER COLUMN config_data TYPE text, ALTER COLUMN config_default TYPE text; + +insert into public.alert_config_info (config_id, created_date, modified_date, config_data, config_default, in_use) values ('routes', now(), null, '\n - receiver: ''${ROUTES_RECEIVER}''\n group_by: [${ROUTES_GROUP_BY}]\n group_wait: ${ROUTES_GROUP_WAIT}\n group_interval: ${ROUTES_GROUP_INTERVAL}\n repeat_interval: ${ROUTES_REPEAT_INTERVAL}\n match_re:\n level: ${LEVEL}\n continue: ${CONTINUE}', '\n - receiver: ''cdms''\n group_by: [xm_clst_id, level]\n group_wait: 5s\n group_interval: 5s\n repeat_interval: 1m\n match_re:\n level: Critical\n continue: true', true); \ No newline at end of file diff --git a/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.4.2.psql b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.4.2.psql new file mode 100644 index 0000000..5c5d3c9 --- /dev/null +++ b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.4.2.psql @@ -0,0 +1,8 @@ +-- admin의 owner 속성 추가 +UPDATE cloud_user SET is_tenant_owner = true WHERE user_id = 'admin'; + +-- owner에 대한 종속성을 admin으로 이관기능(필요하면 사용) +UPDATE auth_resource3 SET name = replace(name, 'owner', 'admin') WHERE name like '%|owner|%'; + +-- CLOUD-2305 node_memory_used metric_meta node_memory_SReclaimable_bytes 제거 패치문 반영 +UPDATE metric_meta2 SET expr = '((node_memory_MemTotal_bytes{xm_entity_type="Node", {filter}} - (node_memory_MemFree_bytes{xm_entity_type="Node", {filter}} + node_memory_Cached_bytes{xm_entity_type="Node", {filter}} + node_memory_Buffers_bytes{xm_entity_type="Node", {filter}})) >= 0 or node_memory_MemTotal_bytes{xm_entity_type="Node", {filter}} - node_memory_MemFree_bytes{xm_entity_type="Node", {filter}}) / 1024 / 1024 / 1024' WHERE id = 'node_memory_used'; diff --git a/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.4.3.psql b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.4.3.psql new file mode 100644 index 0000000..02f01db --- /dev/null +++ b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.4.3.psql @@ -0,0 +1,361 @@ +-- agent_install_file_info +UPDATE public.agent_install_file_info SET yaml = '--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: cloudmoa-metric-agent-config + namespace: $CLOUDMOA_NAMESPACE +data: + scaling.rules: | + groups: + - name: scaleup + rules : + - alert : ScaleUpRule + expr: job:webapp_config_open_sessions_current_count:sum > 15 + annotations: + summary: "Scale up when current sessions is greater than 15" + description: "Firing when total sessions active greater than 15" + metric-agent.yml: | + global: + scrape_interval: 15s + + scrape_configs: + - job_name: ''kubernetes-kubelet'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - target_label: xm_entity_type + replacement: ''Node'' + + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (kubelet_running_pod_count) + action: keep + + - job_name: ''kubernetes-node-exporter'' + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - source_labels: [__meta_kubernetes_role] + action: replace + target_label: kubernetes_role + - source_labels: [__address__] + regex: ''(.*):10250'' + replacement: ''${1}:9110'' + target_label: __address__ + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: __instance__ + # set "name" value to "job" + - source_labels: [job] + regex: ''kubernetes-(.*)'' + replacement: ''${1}'' + target_label: name + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Node'' + - source_labels: [__meta_kubernetes_namespace] + separator: ; + regex: (.*) + target_label: xm_namespace + replacement: $1 + action: replace + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (node_memory_SReclaimable_bytes|node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total) + action: keep + + + - job_name: ''kubernetes-cadvisor'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + metrics_path: /metrics/cadvisor + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Container'' + + metric_relabel_configs: + - source_labels: [namespace] + target_label: xm_namespace + - source_labels: [pod_name] + target_label: xm_pod_id + - source_labels: [container_name] + target_label: xm_cont_name + - source_labels: [id] + target_label: xm_cont_id + - source_labels: [container_name] + regex: (.+) + action: keep + - source_labels: [ __name__ ] + regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes) + action: keep +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cloudmoa-metric-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-metric-agent +spec: + selector: + matchLabels: + app: cloudmoa-metric-agent + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: cloudmoa-metric-agent + spec: + containers: + - name: cloudmoa-metric-agent + image: $DOCKER_REGISTRY_URL/metric-agent:$IMAGE_TAG + args: + - --config.file=/etc/metric-agent/metric-agent.yml + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 300m + memory: 1000Mi + volumeMounts: + - mountPath: /etc/metric-agent/ + name: config-volume + env: + - name: LOG_LEVEL + value: "INFO" + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: DATAGATE + value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT + - name: STORAGE_TYPE + value: datagate + restartPolicy: Always + volumes: + - name: config-volume + configMap: + name: cloudmoa-metric-agent-config +'::text WHERE id = 3::bigint; + +UPDATE public.agent_install_file_info SET yaml = '--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: cloudmoa-metric-agent-config + namespace: $CLOUDMOA_NAMESPACE +data: + scaling.rules: | + groups: + - name: scaleup + rules : + - alert : ScaleUpRule + expr: job:webapp_config_open_sessions_current_count:sum > 15 + annotations: + summary: "Scale up when current sessions is greater than 15" + description: "Firing when total sessions active greater than 15" + metric-agent.yml: | + global: + scrape_interval: 15s + + scrape_configs: + - job_name: ''kubernetes-kubelet'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - target_label: xm_entity_type + replacement: ''Node'' + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (kubelet_running_pod_count) + action: keep + + - job_name: ''kubernetes-node-exporter'' + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - source_labels: [__meta_kubernetes_role] + action: replace + target_label: kubernetes_role + - source_labels: [__address__] + regex: ''(.*):10250'' + replacement: ''${1}:9110'' + target_label: __address__ + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: __instance__ + # set "name" value to "job" + - source_labels: [job] + regex: ''kubernetes-(.*)'' + replacement: ''${1}'' + target_label: name + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Node'' + - source_labels: [__meta_kubernetes_namespace] + separator: ; + regex: (.*) + target_label: xm_namespace + replacement: $1 + action: replace + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (node_memory_SReclaimable_bytes|node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total) + action: keep + + - job_name: ''kubernetes-cadvisor'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: node + metrics_path: /metrics/cadvisor + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Container'' + + metric_relabel_configs: + - source_labels: [namespace] + target_label: xm_namespace + - source_labels: [pod] + target_label: xm_pod_id + - source_labels: [container] + target_label: xm_cont_name + - source_labels: [id] + target_label: xm_cont_id + - source_labels: [container] + regex: (.+) + action: keep + - source_labels: [ __name__ ] + regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes) + action: keep +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cloudmoa-metric-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-metric-agent +spec: + selector: + matchLabels: + app: cloudmoa-metric-agent + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: cloudmoa-metric-agent + spec: + containers: + - name: cloudmoa-metric-agent + image: $DOCKER_REGISTRY_URL/metric-agent:$IMAGE_TAG + args: + - --config.file=/etc/metric-agent/metric-agent.yml + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 300m + memory: 1000Mi + volumeMounts: + - mountPath: /etc/metric-agent/ + name: config-volume + env: + - name: LOG_LEVEL + value: "INFO" + - name: LOG_MAXAGE + value: "1" + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: DATAGATE + value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT + - name: STORAGE_TYPE + value: datagate + restartPolicy: Always + volumes: + - name: config-volume + configMap: + name: cloudmoa-metric-agent-config +'::text WHERE id = 6::bigint; + +-- CLOUD-2798 pod_phase_count_by_cluster metric_meta 수정 +UPDATE metric_meta2 SET expr = 'count by(xm_clst_id, pod_state) (sum by (xm_clst_id, xm_pod_id, pod_state)(rate(imxc_kubernetes_container_resource_limit_cpu{{filter}}[1m])))' WHERE id = 'pod_phase_count_by_cluster'; + +-- node_memory_usage 수정 +update metric_meta2 set expr = 'sum by (xm_node_id)((node_memory_MemTotal_bytes{xm_entity_type="Node"}- (node_memory_MemFree_bytes{xm_entity_type="Node"} + node_memory_Cached_bytes{xm_entity_type="Node"} + node_memory_Buffers_bytes{xm_entity_type="Node"})) >= 0 or node_memory_MemTotal_bytes{xm_entity_type="Node"}- node_memory_MemFree_bytes{xm_entity_type="Node"}) / (sum by (xm_node_id) (imxc_kubernetes_node_resource_capacity_memory{{filter}})) * 100' where id = 'node_memory_usage'; \ No newline at end of file diff --git a/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.4.6.psql b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.4.6.psql new file mode 100644 index 0000000..7c582c5 --- /dev/null +++ b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.4.6.psql @@ -0,0 +1,360 @@ +-- CLOUD-3473 Memory capacity 조회 쿼리 수정 +update metric_meta2 set description = 'imxc_kubernetes_node_resource_capacity_memory', +expr = 'sum by (xm_clst_id) (imxc_kubernetes_node_resource_capacity_memory{{filter}})' where id = 'cluster_memory_capacity'; + +-- module명 metricdata owner_name 와 일치하도록 변경 +update common_setting set code_value ='cmoa-collector' where code_id = 'Cloudmoa Collector'; +update common_setting set code_value ='imxc-api' where code_id = 'Api Server'; +update common_setting set code_value ='imxc-ui' where code_id = 'Ui Server'; +update common_setting set code_value ='cloudmoa-trace-agent' where code_id = 'Trace Agent'; + +-- CLOUD-4795 Contaeird 환경 Container Network 수집 불가 건 확인 +-- 22.10.08 현대카드 대응 건으로 release 3.4.6에 반영 +UPDATE public.agent_install_file_info SET yaml = '--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: cloudmoa-metric-agent-config + namespace: $CLOUDMOA_NAMESPACE +data: + scaling.rules: | + groups: + - name: scaleup + rules : + - alert : ScaleUpRule + expr: job:webapp_config_open_sessions_current_count:sum > 15 + annotations: + summary: "Scale up when current sessions is greater than 15" + description: "Firing when total sessions active greater than 15" + metric-agent.yml: | + global: + scrape_interval: 15s + + scrape_configs: + - job_name: ''kubernetes-kubelet'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - target_label: xm_entity_type + replacement: ''Node'' + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (kubelet_running_pod_count) + action: keep + + - job_name: ''kubernetes-node-exporter'' + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - source_labels: [__meta_kubernetes_role] + action: replace + target_label: kubernetes_role + - source_labels: [__address__] + regex: ''(.*):10250'' + replacement: ''${1}:9110'' + target_label: __address__ + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: __instance__ + # set "name" value to "job" + - source_labels: [job] + regex: ''kubernetes-(.*)'' + replacement: ''${1}'' + target_label: name + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Node'' + - source_labels: [__meta_kubernetes_namespace] + separator: ; + regex: (.*) + target_label: xm_namespace + replacement: $1 + action: replace + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (node_memory_SReclaimable_bytes|node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total) + action: keep + + - job_name: ''kubernetes-cadvisor'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: node + metrics_path: /metrics/cadvisor + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Container'' + + metric_relabel_configs: + - source_labels: [namespace] + target_label: xm_namespace + - source_labels: [pod] + target_label: xm_pod_id + - source_labels: [container] + target_label: xm_cont_name + - source_labels: [id] + target_label: xm_cont_id + - source_labels: [ __name__ ] + regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes) + action: keep +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cloudmoa-metric-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-metric-agent +spec: + selector: + matchLabels: + app: cloudmoa-metric-agent + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: cloudmoa-metric-agent + spec: + containers: + - name: cloudmoa-metric-agent + image: $DOCKER_REGISTRY_URL/metric-agent:$IMAGE_TAG + args: + - --config.file=/etc/metric-agent/metric-agent.yml + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 300m + memory: 1000Mi + volumeMounts: + - mountPath: /etc/metric-agent/ + name: config-volume + env: + - name: LOG_LEVEL + value: "INFO" + - name: LOG_MAXAGE + value: "1" + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: DATAGATE + value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT + - name: STORAGE_TYPE + value: datagate + restartPolicy: Always + volumes: + - name: config-volume + configMap: + name: cloudmoa-metric-agent-config +'::text WHERE id = 6::bigint; + +UPDATE public.agent_install_file_info SET yaml = '--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: cloudmoa-metric-agent-config + namespace: $CLOUDMOA_NAMESPACE +data: + scaling.rules: | + groups: + - name: scaleup + rules : + - alert : ScaleUpRule + expr: job:webapp_config_open_sessions_current_count:sum > 15 + annotations: + summary: "Scale up when current sessions is greater than 15" + description: "Firing when total sessions active greater than 15" + metric-agent.yml: | + global: + scrape_interval: 15s + + scrape_configs: + - job_name: ''kubernetes-kubelet'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - target_label: xm_entity_type + replacement: ''Node'' + + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (kubelet_running_pod_count) + action: keep + + - job_name: ''kubernetes-node-exporter'' + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - source_labels: [__meta_kubernetes_role] + action: replace + target_label: kubernetes_role + - source_labels: [__address__] + regex: ''(.*):10250'' + replacement: ''${1}:9110'' + target_label: __address__ + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: __instance__ + # set "name" value to "job" + - source_labels: [job] + regex: ''kubernetes-(.*)'' + replacement: ''${1}'' + target_label: name + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Node'' + - source_labels: [__meta_kubernetes_namespace] + separator: ; + regex: (.*) + target_label: xm_namespace + replacement: $1 + action: replace + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (node_memory_SReclaimable_bytes|node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total) + action: keep + + + - job_name: ''kubernetes-cadvisor'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + metrics_path: /metrics/cadvisor + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Container'' + + metric_relabel_configs: + - source_labels: [namespace] + target_label: xm_namespace + - source_labels: [pod_name] + target_label: xm_pod_id + - source_labels: [container_name] + target_label: xm_cont_name + - source_labels: [id] + target_label: xm_cont_id + - source_labels: [ __name__ ] + regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes) + action: keep +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cloudmoa-metric-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-metric-agent +spec: + selector: + matchLabels: + app: cloudmoa-metric-agent + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: cloudmoa-metric-agent + spec: + containers: + - name: cloudmoa-metric-agent + image: $DOCKER_REGISTRY_URL/metric-agent:$IMAGE_TAG + args: + - --config.file=/etc/metric-agent/metric-agent.yml + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 300m + memory: 1000Mi + volumeMounts: + - mountPath: /etc/metric-agent/ + name: config-volume + env: + - name: LOG_LEVEL + value: "INFO" + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: DATAGATE + value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT + - name: STORAGE_TYPE + value: datagate + restartPolicy: Always + volumes: + - name: config-volume + configMap: + name: cloudmoa-metric-agent-config'::text WHERE id = 3::bigint; + diff --git a/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.4.7.psql b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.4.7.psql new file mode 100644 index 0000000..92344db --- /dev/null +++ b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.4.7.psql @@ -0,0 +1,102 @@ +-- CLOUD-4752 node_memory_usage alert 관련 쿼리 수정 +update metric_meta2 set +expr = 'sum by (xm_clst_id, xm_node_id)((node_memory_MemTotal_bytes{xm_entity_type="Node"}- (node_memory_MemFree_bytes{xm_entity_type="Node"} + node_memory_Cached_bytes{xm_entity_type="Node"} + node_memory_Buffers_bytes{xm_entity_type="Node"})) >= 0 or node_memory_MemTotal_bytes{xm_entity_type="Node"}- node_memory_MemFree_bytes{xm_entity_type="Node"}) / (sum by (xm_clst_id, xm_node_id) (imxc_kubernetes_node_resource_capacity_memory{{filter}})) * 100' +where id = 'node_memory_usage'; + +-- CLOUD-6474 node-exporter | GPMAXPROCS 세팅 +-- Auto-generated SQL script #202211241543 +UPDATE public.agent_install_file_info + SET yaml='--- +apiVersion: v1 +kind: Service +metadata: + annotations: + prometheus.io/scrape: ''true'' + labels: + app: cloudmoa-node-exporter + name: cloudmoa-node-exporter + name: cloudmoa-node-exporter + namespace: $CLOUDMOA_NAMESPACE +spec: + clusterIP: None + ports: + - name: scrape + port: 9110 + protocol: TCP + selector: + app: cloudmoa-node-exporter + type: ClusterIP +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: cloudmoa-node-exporter + namespace: $CLOUDMOA_NAMESPACE +spec: + selector: + matchLabels: + app: cloudmoa-node-exporter + template: + metadata: + labels: + app: cloudmoa-node-exporter + name: cloudmoa-node-exporter + spec: + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + containers: + - image: $DOCKER_REGISTRY_URL/node-exporter + name: cloudmoa-node-exporter + ports: + - containerPort: 9110 + hostPort: 9110 + name: scrape + args: + - --path.procfs=/host/proc + - --path.sysfs=/host/sys + - --path.rootfs=/host/root + - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|run|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/) + - --collector.tcpstat + - --web.listen-address=:9110 + # --log.level=debug + env: + - name: GOMAXPROCS + value: "1" + resources: + limits: + cpu: 250m + memory: 180Mi + requests: + cpu: 102m + memory: 180Mi + volumeMounts: + - mountPath: /host/proc + name: proc + readOnly: false + - mountPath: /host/sys + name: sys + readOnly: false + - mountPath: /host/root + mountPropagation: HostToContainer + name: root + readOnly: true + hostNetwork: true + hostPID: true + securityContext: + runAsNonRoot: true + runAsUser: 65534 + volumes: + - hostPath: + path: /proc + name: proc + - hostPath: + path: /sys + name: sys + - hostPath: + path: / + name: root +' + WHERE id=4; \ No newline at end of file diff --git a/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.4.8.psql b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.4.8.psql new file mode 100644 index 0000000..ea66c68 --- /dev/null +++ b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.4.8.psql @@ -0,0 +1,387 @@ +-- CLOUD-6526 host 관련 쿼리 수정 +-- 수집된 메트릭 시간차로 인해 데이터 표출이 안되는걸 방지하기 위해 rate 5m 추가 +UPDATE metric_meta2 SET expr = 'sum by (data_type, instance) ( +label_replace(rate(node_network_receive_bytes_total{{filter}}[1m]) or rate(node_network_receive_bytes_total{{filter}}[5m]), "data_type", "Receive", "", "") or +label_replace(rate(node_network_transmit_bytes_total{{filter}}[1m]) or rate(node_network_transmit_bytes_total{{filter}}[5m]), "data_type", "Transmit", "", "") )' +WHERE id='host_network_io_byte'; + +UPDATE public.metric_meta2 SET expr = 'sum by (data_type, instance) ( +label_replace(rate(node_disk_read_bytes_total{{filter}}[1m]) or rate(node_disk_read_bytes_total{{filter}}[5m]), "data_type", "Read", "", "") or +label_replace(rate(node_disk_written_bytes_total{{filter}}[1m]) or rate(node_disk_written_bytes_total{{filter}}[5m]), "data_type", "Write", "", "") )' +WHERE id = 'host_disk_read_write_byte'; + +UPDATE public.metric_meta2 SET expr = 'sum by (instance) ( +(rate(node_disk_reads_completed_total{{filter}}[1m]) + rate(node_disk_writes_completed_total{{filter}}[1m])) or +(rate(node_disk_reads_completed_total{{filter}}[5m]) + rate(node_disk_writes_completed_total{{filter}}[5m])))' +WHERE id = 'host_disk_iops'; + +-- CLOUD-8671 Metric-Agent | 데이터 필터링 설정 추가 +-- Workload > Pod 화면 등에 Docker 런타임 환경의 자원 사용량이 2배 가량으로 보이던 문제 픽스 +UPDATE public.agent_install_file_info + SET yaml='--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: cloudmoa-metric-agent-config + namespace: $CLOUDMOA_NAMESPACE +data: + scaling.rules: | + groups: + - name: scaleup + rules : + - alert : ScaleUpRule + expr: job:webapp_config_open_sessions_current_count:sum > 15 + annotations: + summary: "Scale up when current sessions is greater than 15" + description: "Firing when total sessions active greater than 15" + metric-agent.yml: | + global: + scrape_interval: 15s + + scrape_configs: + - job_name: ''kubernetes-kubelet'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - target_label: xm_entity_type + replacement: ''Node'' + + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (kubelet_running_pod_count) + action: keep + + - job_name: ''kubernetes-node-exporter'' + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - source_labels: [__meta_kubernetes_role] + action: replace + target_label: kubernetes_role + - source_labels: [__address__] + regex: ''(.*):10250'' + replacement: ''${1}:9110'' + target_label: __address__ + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: __instance__ + # set "name" value to "job" + - source_labels: [job] + regex: ''kubernetes-(.*)'' + replacement: ''${1}'' + target_label: name + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Node'' + - source_labels: [__meta_kubernetes_namespace] + separator: ; + regex: (.*) + target_label: xm_namespace + replacement: $1 + action: replace + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (node_memory_SReclaimable_bytes|node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total) + action: keep + + + - job_name: ''kubernetes-cadvisor'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + metrics_path: /metrics/cadvisor + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Container'' + + metric_relabel_configs: + - source_labels: [namespace] + target_label: xm_namespace + - source_labels: [pod_name] + target_label: xm_pod_id + - source_labels: [container_name] + target_label: xm_cont_name + - source_labels: [id] + target_label: xm_cont_id + - source_labels: [ __name__ ] + regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes) + action: keep + - source_labels: [ __name__, image ] + separator: "@" + regex: "container_cpu.*@" + action: drop + - source_labels: [ __name__, name ] + separator: "@" + regex: "container_memory.*@" + action: drop +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cloudmoa-metric-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-metric-agent +spec: + selector: + matchLabels: + app: cloudmoa-metric-agent + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: cloudmoa-metric-agent + spec: + containers: + - name: cloudmoa-metric-agent + image: $DOCKER_REGISTRY_URL/metric-agent:$IMAGE_TAG + args: + - --config.file=/etc/metric-agent/metric-agent.yml + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 300m + memory: 1000Mi + volumeMounts: + - mountPath: /etc/metric-agent/ + name: config-volume + env: + - name: LOG_LEVEL + value: "INFO" + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: DATAGATE + value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT + - name: STORAGE_TYPE + value: datagate + restartPolicy: Always + volumes: + - name: config-volume + configMap: + name: cloudmoa-metric-agent-config +' + WHERE id=3; + +UPDATE public.agent_install_file_info + SET yaml='--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: cloudmoa-metric-agent-config + namespace: $CLOUDMOA_NAMESPACE +data: + scaling.rules: | + groups: + - name: scaleup + rules : + - alert : ScaleUpRule + expr: job:webapp_config_open_sessions_current_count:sum > 15 + annotations: + summary: "Scale up when current sessions is greater than 15" + description: "Firing when total sessions active greater than 15" + metric-agent.yml: | + global: + scrape_interval: 15s + + scrape_configs: + - job_name: ''kubernetes-kubelet'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - target_label: xm_entity_type + replacement: ''Node'' + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (kubelet_running_pod_count) + action: keep + + - job_name: ''kubernetes-node-exporter'' + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - source_labels: [__meta_kubernetes_role] + action: replace + target_label: kubernetes_role + - source_labels: [__address__] + regex: ''(.*):10250'' + replacement: ''${1}:9110'' + target_label: __address__ + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: __instance__ + # set "name" value to "job" + - source_labels: [job] + regex: ''kubernetes-(.*)'' + replacement: ''${1}'' + target_label: name + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Node'' + - source_labels: [__meta_kubernetes_namespace] + separator: ; + regex: (.*) + target_label: xm_namespace + replacement: $1 + action: replace + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (node_memory_SReclaimable_bytes|node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total) + action: keep + + - job_name: ''kubernetes-cadvisor'' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: node + metrics_path: /metrics/cadvisor + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Container'' + + metric_relabel_configs: + - source_labels: [namespace] + target_label: xm_namespace + - source_labels: [pod] + target_label: xm_pod_id + - source_labels: [container] + target_label: xm_cont_name + - source_labels: [id] + target_label: xm_cont_id + - source_labels: [ __name__ ] + regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes) + action: keep + - source_labels: [ __name__, image ] + separator: "@" + regex: "container_cpu.*@" + action: drop + - source_labels: [ __name__, name ] + separator: "@" + regex: "container_memory.*@" + action: drop +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cloudmoa-metric-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-metric-agent +spec: + selector: + matchLabels: + app: cloudmoa-metric-agent + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: cloudmoa-metric-agent + spec: + containers: + - name: cloudmoa-metric-agent + image: $DOCKER_REGISTRY_URL/metric-agent:$IMAGE_TAG + args: + - --config.file=/etc/metric-agent/metric-agent.yml + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 300m + memory: 1000Mi + volumeMounts: + - mountPath: /etc/metric-agent/ + name: config-volume + env: + - name: LOG_LEVEL + value: "INFO" + - name: LOG_MAXAGE + value: "1" + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: DATAGATE + value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT + - name: STORAGE_TYPE + value: datagate + restartPolicy: Always + volumes: + - name: config-volume + configMap: + name: cloudmoa-metric-agent-config +' + WHERE id=6; diff --git a/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.5.0.psql b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.5.0.psql new file mode 100644 index 0000000..f518024 --- /dev/null +++ b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.5.0.psql @@ -0,0 +1,147 @@ +-- CLOUD-12000 Disk R/W 메트릭 변경 - Read, Write 반대로 설정 +UPDATE metric_meta2 SET expr = 'sum by(xm_clst_id, xm_pod_id, xm_cont_name, data_type) (label_replace(rate(container_fs_writes_bytes_total{xm_entity_type="Container",{filter}}[1m]), "data_type", "Write" , "", "") or label_replace(rate(container_fs_reads_bytes_total{xm_entity_type="Container",{filter}}[1m]), "data_type", "Read", "" , ""))' WHERE id ='container_disk_read_write_byte'; +UPDATE metric_meta2 SET expr = 'sum by(xm_clst_id, xm_pod_id, data_type) (label_replace(rate(container_fs_writes_bytes_total{xm_entity_type="Container",{filter}}[1m]), "data_type", "Write" , "", "") or label_replace(rate(container_fs_reads_bytes_total{xm_entity_type="Container",{filter}}[1m]), "data_type", "Read", "" , ""))' WHERE id ='pod_disk_read_write_byte'; +UPDATE menu_meta SET description = 'My List' WHERE url = 'documentDashboard'; +UPDATE menu_meta SET description = 'Shared List' WHERE url = 'templateDashboard'; +UPDATE auth_resource2 SET description = 'My List' WHERE id = 48; +UPDATE auth_resource2 SET description = 'Shared List' WHERE id = 49; +UPDATE auth_resource3 SET description = 'My List' WHERE id = 48; +UPDATE auth_resource3 SET description = 'Shared List' WHERE id = 49; +UPDATE auth_resource3 SET description = 'menu|Dashboards|My List' WHERE id = 110; +UPDATE auth_resource3 SET description = 'menu|Dashboards|hared List' WHERE id = 111; + +-- Dashboard 추가 (Postgres , Redis , Traefik , MySQL / MariaDB , Nginx-Ingress) +INSERT INTO public.auth_resource3 (name, is_deleted, tenant_id) VALUES ('dashboard|admin|Postgres', false, null); +INSERT INTO public.auth_resource3 (name, is_deleted, tenant_id) VALUES ('dashboard|admin|Redis', false, null); +INSERT INTO public.auth_resource3 (name, is_deleted, tenant_id) VALUES ('dashboard|admin|Traefik', false, null); +INSERT INTO public.auth_resource3 (name, is_deleted, tenant_id) VALUES ('dashboard|admin|MySQL / MariaDB', false, null); +INSERT INTO public.auth_resource3 (name, is_deleted, tenant_id) VALUES ('dashboard|admin|Nginx-Ingress', false, null); + +INSERT INTO public.dashboard2 (id, created_date, modified_date, layout, title, auth_resource_id, created_by, modified_by, description, "share") + VALUES(nextval('hibernate_sequence'), '2020-04-28 09:23:14.286', '2020-04-28 09:23:44.213', '[{"i":"widget0","widget":{"header":"default-header","body":"label-view","title":" "},"w":48,"h":3,"minW":2,"minH":1,"maxW":48,"maxH":36,"component":{"api":{},"visualization":{"background":null,"fontSize":18,"textAlign":"left","padding":[0,18],"sideways":false,"fontWeight":"bold","text":"Resource Utilization"}},"x":0,"y":9,"static":true},{"i":"widget1","widget":{"header":"default-header","body":"line-chart-view","title":"CPU Usage (%)","description":"postgres에 대한 pod들의 cpu 사용량"},"w":12,"h":10,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"imxc","metricId":"container_cpu_usage_by_workload","entityId":"postgres","type":"workload"}},"visualization":{"showLegend":true}},"x":0,"y":12,"static":true},{"i":"widget2","widget":{"header":"default-header","body":"line-chart-view","title":"Memory Usage (GiB)"},"w":12,"h":10,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"imxc","metricId":"container_memory_working_set_bytes_by_workload","entityId":"postgres","type":"workload"}},"visualization":{"showLegend":true}},"x":12,"y":12,"static":true},{"i":"widget4","widget":{"header":"default-header","body":"line-chart-view","title":"Network Recieved (KiB)"},"w":12,"h":10,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"imxc","metricId":"container_network_receive_by_workload","entityId":"postgres","type":"workload"}},"visualization":{"showLegend":true,"background":null,"fontSize":12,"textAlign":"center","padding":[0,18],"sideways":false}},"x":36,"y":12,"static":true},{"i":"widget3","widget":{"header":"default-header","body":"line-chart-view","title":"Network Transmit (KiB)"},"w":12,"h":10,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"imxc","metricId":"container_network_transmit_by_workload","entityId":"postgres","type":"workload"}},"visualization":{"showLegend":true}},"x":24,"y":12,"static":true},{"i":"widget5","widget":{"header":"default-header","body":"label-view","title":" "},"w":48,"h":3,"minW":2,"minH":1,"maxW":48,"maxH":36,"component":{"api":{},"visualization":{"background":null,"fontSize":18,"textAlign":"left","padding":[0,18],"sideways":false,"fontWeight":"bold","text":"General Statics"}},"x":0,"y":22,"static":true},{"i":"widget6","widget":{"header":"default-header","body":"horizontal-bar-chart-view","title":"Lock mode"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"pg_count_by_lockmode","entityId":"","type":"PostgreSQL","filter":""}},"visualization":{"showLegend":true}},"x":0,"y":25,"static":true},{"i":"widget7","widget":{"header":"default-header","body":"horizontal-bar-chart-view","title":"Connection By DB and State"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"pg_count_by_dbname_state","entityId":"","type":"PostgreSQL","filter":""}},"visualization":{"showLegend":true}},"x":24,"y":25,"static":true},{"i":"widget8","widget":{"header":"default-header","body":"stat-view","title":"Number of Client Connection"},"w":17,"h":6,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"pg_stat_activity_count","entityId":"","type":"PostgreSQL","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":2,"baseColor":"#409EFF","thresholds":[],"showLegend":true}},"x":15,"y":3,"static":true},{"i":"widget9","widget":{"header":"default-header","body":"label-view","title":" "},"w":48,"h":3,"minW":2,"minH":1,"maxW":48,"maxH":36,"component":{"api":{},"visualization":{"background":null,"fontSize":18,"textAlign":"left","padding":[0,18],"sideways":false,"fontWeight":"bold","text":"Overview"}},"x":0,"y":0,"static":true},{"i":"widget11","widget":{"header":"default-header","body":"stat-view","title":"Lock Count"},"w":16,"h":6,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"pg_locks_count","entityId":"","type":"PostgreSQL","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":2,"baseColor":"#409EFF","thresholds":[]}},"x":32,"y":3,"static":true},{"i":"widget10","widget":{"header":"default-header","body":"stat-view","title":"Up"},"w":15,"h":6,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"pg_up","entityId":"","type":"PostgreSQL","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":2,"baseColor":"#409EFF","thresholds":[{"color":"#32AC2D","value":"1"}]}},"x":0,"y":3,"static":true},{"i":"widget12","widget":{"header":"default-header","body":"line-chart-view","title":"Buffer Cache Hit Ratio (%)"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"pg_stat_database_cache_hit_ratio","entityId":"","type":"PostgreSQL","filter":""}},"visualization":{"showLegend":true,"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":2,"baseColor":"#409EFF","thresholds":[]}},"x":0,"y":36,"static":true},{"i":"widget13","widget":{"header":"default-header","body":"line-chart-view","title":"Read/Write spent time by file blocks"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"pg_stat_database_blk_read_write_time","entityId":"","type":"PostgreSQL","filter":""}},"visualization":{"showLegend":true,"row":8,"column":["date","value"]}},"x":24,"y":36,"static":true},{"i":"widget14","widget":{"header":"default-header","body":"label-view","title":" "},"w":48,"h":3,"minW":2,"minH":1,"maxW":48,"maxH":36,"component":{"api":{},"visualization":{"background":null,"fontSize":18,"textAlign":"left","padding":[0,18],"sideways":false,"fontWeight":"bold","text":"Block"}},"x":0,"y":47,"static":true},{"i":"widget15","widget":{"header":"default-header","body":"line-chart-view","title":"Number of Disk Blocks Read"},"w":48,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"pg_stat_database_blks_read","entityId":"","type":"PostgreSQL","filter":""}},"visualization":{"showLegend":true}},"x":0,"y":61,"static":true},{"i":"widget17","widget":{"header":"default-header","body":"line-chart-view","title":"Number of Rows Inserted"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"pg_stat_database_tup_inserted","entityId":"","type":"PostgreSQL","filter":""}},"visualization":{"showLegend":true}},"x":0,"y":75,"static":true},{"i":"widget18","widget":{"header":"default-header","body":"line-chart-view","title":"Time Spent Reading Data File Blocks (ms)"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"pg_stat_database_blk_read_time","entityId":"","type":"PostgreSQL","filter":""}},"visualization":{"showLegend":true}},"x":0,"y":50,"static":true},{"i":"widget19","widget":{"header":"default-header","body":"line-chart-view","title":"Time Spent Writing Data File Blocks (ms)"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"pg_stat_database_blk_write_time","entityId":"","type":"PostgreSQL","filter":""}},"visualization":{"showLegend":true}},"x":24,"y":50,"static":true},{"i":"widget20","widget":{"header":"default-header","body":"label-view","title":" "},"w":48,"h":3,"minW":2,"minH":1,"maxW":48,"maxH":36,"component":{"api":{},"visualization":{"background":null,"fontSize":18,"textAlign":"left","padding":[0,18],"sideways":false,"fontWeight":"bold","text":"Row"}},"x":0,"y":72,"static":true},{"i":"widget16","widget":{"header":"default-header","body":"line-chart-view","title":"Number of Rows Updated"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"pg_stat_database_tup_updated","entityId":"","type":"PostgreSQL","filter":""}},"visualization":{"showLegend":true}},"x":24,"y":75,"static":true},{"i":"widget21","widget":{"header":"default-header","body":"stack-bar-chart-view","title":"Number of Transactions Committed"},"w":24,"h":10,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"pg_stat_database_xact_commit","entityId":"","type":"PostgreSQL","filter":""}},"visualization":{"showLegend":true}},"x":0,"y":110,"static":true},{"i":"widget22","widget":{"header":"default-header","body":"line-chart-view","title":"Number of Rows Returned"},"w":48,"h":10,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"pg_stat_database_tup_returned","entityId":"","type":"PostgreSQL","filter":""}},"visualization":{"showLegend":true}},"x":0,"y":97,"static":true},{"i":"widget23","widget":{"header":"default-header","body":"line-chart-view","title":"Number of Rows Fetched"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"pg_stat_database_tup_fetched","entityId":"","type":"PostgreSQL","filter":""}},"visualization":{"showLegend":true}},"x":24,"y":86,"static":true},{"i":"widget24","widget":{"header":"default-header","body":"line-chart-view","title":"Number of Rows Deleted"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"pg_stat_database_tup_deleted","entityId":"","type":"PostgreSQL","filter":""}},"visualization":{"showLegend":true}},"x":0,"y":86,"static":true},{"i":"widget25","widget":{"header":"default-header","body":"label-view","title":" "},"w":48,"h":3,"minW":2,"minH":1,"maxW":48,"maxH":36,"component":{"api":{},"visualization":{"background":null,"fontSize":18,"textAlign":"left","padding":[0,18],"sideways":false,"fontWeight":"bold","text":"Transaction"}},"x":0,"y":107,"static":true},{"i":"widget26","widget":{"header":"default-header","body":"stack-bar-chart-view","title":"Number of Transactions Rolled Back"},"w":24,"h":10,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"pg_stat_database_xact_rollback","entityId":"","type":"PostgreSQL","filter":""}},"visualization":{"showLegend":true}},"x":24,"y":110,"static":true}]', + 'Postgres', (select id from auth_resource3 where name='dashboard|admin|Postgres'), 'admin', 'admin', NULL, true); +INSERT INTO public.dashboard2 (id, created_date, modified_date, layout, title, auth_resource_id, created_by, modified_by, description, "share") + VALUES(nextval('hibernate_sequence'), '2020-04-28 09:23:14.286', '2020-04-28 09:23:44.213', '[{"i":"widget0","widget":{"header":"default-header","body":"line-chart-view","title":"CPU Usage (%)","description":"Redis Pod에 대한 CPU 사용량"},"w":12,"h":10,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"imxc","metricId":"container_cpu_usage_core_by_workload","entityId":"redis-master,redis-slave","type":"workload","filter":""}},"visualization":{"showLegend":true}},"x":0,"y":7,"static":true},{"i":"widget1","widget":{"header":"default-header","body":"line-chart-view","title":"Memory Usage (GiB)"},"w":12,"h":10,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"imxc","metricId":"container_memory_usage_bytes_by_workload","entityId":"redis-master,redis-slave","type":"workload","filter":""}},"visualization":{"showLegend":true}},"x":12,"y":7,"static":true},{"i":"widget2","widget":{"header":"default-header","body":"line-chart-view","title":"Network Transmit (KB)"},"w":12,"h":10,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"imxc","metricId":"container_network_transmit_by_workload","entityId":"redis-master,redis-slave","type":"workload"}},"visualization":{"showLegend":true}},"x":24,"y":7,"static":true},{"i":"widget3","widget":{"header":"default-header","body":"line-chart-view","title":"Network Recieved (KiB)"},"w":12,"h":10,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"imxc","metricId":"container_network_receive_by_workload","entityId":"redis-master,redis-slave","type":"workload"}},"visualization":{"showLegend":true}},"x":36,"y":7,"static":true},{"i":"widget4","widget":{"header":"default-header","body":"stat-view","title":"Redis Up Count"},"w":12,"h":7,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"redis_up","entityId":"","type":"Redis","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":2,"baseColor":"#409EFF","thresholds":[{"color":"#32AC2D","value":"1"}]}},"x":0,"y":0,"static":true},{"i":"widget5","widget":{"header":"default-header","body":"stat-view","title":"Connected Clients Count"},"w":12,"h":7,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"redis_connected_clients","entityId":"","type":"Redis","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":2,"baseColor":"#409EFF","thresholds":[]}},"x":12,"y":0,"static":true},{"i":"widget6","widget":{"header":"default-header","body":"stat-view","title":"Blocked Client Count"},"w":12,"h":7,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"redis_blocked_clients","entityId":"","type":"Redis","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":null,"baseColor":"#409EFF","thresholds":[],"showLegend":true}},"x":36,"y":0,"static":true},{"i":"widget7","widget":{"header":"default-header","body":"horizontal-bar-chart-view","title":"Calls per command"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"redis_commands_total","entityId":"","type":"Redis","filter":""}},"visualization":{"showLegend":true,"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":null,"baseColor":"#409EFF","thresholds":[]}},"x":0,"y":26,"static":true},{"i":"widget8","widget":{"header":"default-header","body":"line-chart-view","title":"Command duration"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"redis_commands_duration_seconds","entityId":"","type":"Redis","filter":"","jspdCluster":null}},"visualization":{"showLegend":true}},"x":24,"y":26,"static":true},{"i":"widget9","widget":{"header":"default-header","body":"stat-view","title":"Rejected Client Count"},"w":12,"h":7,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"redis_rejected_connections","entityId":"","type":"Redis","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":2,"baseColor":"#409EFF","thresholds":[]}},"x":24,"y":0,"static":true},{"i":"widget10","widget":{"header":"default-header","body":"line-chart-view","title":"DB Key"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"redis_db_keys","entityId":"","type":"Redis","filter":""}},"visualization":{"showLegend":true}},"x":0,"y":37,"static":true},{"i":"widget11","widget":{"header":"default-header","body":"line-chart-view","title":"DB Expired Key"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"redis_expired_keys","entityId":"","type":"Redis","filter":""}},"visualization":{"showLegend":true}},"x":24,"y":37,"static":true},{"widget":{"header":"default-header","body":"stat-view","title":"Connected Slave"},"w":16,"h":9,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"redis_connected_slaves","entityId":"","type":"Redis","filter":""}},"visualization":{"showLegend":true,"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":2,"baseColor":"#409EFF","thresholds":[]}},"i":"widget12","static":true,"x":0,"y":17},{"widget":{"header":"default-header","body":"line-chart-view","title":"Slave Offset Bytes"},"w":16,"h":9,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"redis_connected_slave_offset_bytes","entityId":"","type":"Redis","filter":""}},"visualization":{"showLegend":true}},"i":"widget13","static":true,"x":16,"y":17},{"widget":{"header":"default-header","body":"line-chart-view","title":"Slave Lag Seconds"},"w":16,"h":9,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"redis_connected_slave_lag_seconds","entityId":"","type":"Redis","filter":""}},"visualization":{"showLegend":true}},"i":"widget14","static":true,"x":32,"y":17}]', + 'Redis', (select id from auth_resource3 where name='dashboard|admin|Redis'), 'admin', 'admin', NULL, true); +INSERT INTO public.dashboard2 (id, created_date, modified_date, layout, title, auth_resource_id, created_by, modified_by, description, "share") + VALUES(nextval('hibernate_sequence'), '2020-04-28 09:23:14.286', '2020-04-28 09:23:44.213', '[{"i":"widget0","widget":{"header":"default-header","body":"stat-view","title":"4xx Error Per Seconds"},"w":8,"h":7,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"traefik_4xx_error_rate_last_5m","entityId":"","type":"Traefik","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":2,"baseColor":"rgba(255, 0, 0, 1)","thresholds":[],"showLegend":true}},"x":8,"y":0,"static":true},{"i":"widget1","widget":{"header":"default-header","body":"stat-view","title":"5xx Error Per Seconds"},"w":8,"h":7,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"traefik_5xx_error_rate_last_5m","entityId":"","type":"Traefik","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":2,"baseColor":"rgba(255, 0, 0, 1)","thresholds":[]}},"x":16,"y":0,"static":true},{"i":"widget2","widget":{"header":"default-header","body":"line-chart-view","title":"Pod CPU Utilization (Core)"},"w":24,"h":9,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"traefik","metricId":"container_cpu_usage_core_by_workload","entityId":"deploymentList,traefik","type":"workload"}},"visualization":{"showLegend":true}},"x":0,"y":7,"static":true},{"i":"widget3","widget":{"header":"default-header","body":"stat-view","title":"Total Request Per Seconds"},"w":8,"h":7,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"traefik_http_requests_rate_last_5m","entityId":"","type":"Traefik","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":2,"baseColor":"#409EFF","thresholds":[]}},"x":0,"y":0,"static":true},{"i":"widget4","widget":{"header":"default-header","body":"stat-view","title":"Bad Request Count (10m)"},"w":8,"h":7,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"traefik_http_requests_bad_total_last_10m","entityId":"","type":"Traefik","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":null,"baseColor":"rgba(255, 0, 0, 1)","thresholds":[]}},"x":32,"y":0,"static":true},{"i":"widget5","widget":{"header":"default-header","body":"stat-view","title":"Avg. Response Duration (10m)"},"w":8,"h":7,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"traefik_http_response_time_ms","entityId":"","type":"Traefik","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":2,"baseColor":"rgba(64, 158, 255, 1)","thresholds":[]}},"x":40,"y":0,"static":true},{"i":"widget6","widget":{"header":"default-header","body":"stat-view","title":"Total Request Count (10m)"},"w":8,"h":7,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"traefik_http_requests_total_last_10m","entityId":"","type":"Traefik","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":null,"baseColor":"#409EFF","thresholds":[]}},"x":24,"y":0,"static":true},{"i":"widget7","widget":{"header":"default-header","body":"line-chart-view","title":"Pod Memory Utilization (GiB)"},"w":24,"h":9,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"traefik","metricId":"container_memory_working_set_bytes_by_workload","entityId":"deploymentList,traefik","type":"workload"}},"visualization":{"showLegend":true}},"x":24,"y":7,"static":true},{"widget":{"header":"default-header","body":"line-chart-view","title":"ENTRYPOINT - Open Connections"},"w":24,"h":9,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"traefik_entrypoint_open_connections","entityId":"","type":"Traefik","filter":""}},"visualization":{"showLegend":true}},"i":"widget8","static":true,"x":0,"y":26},{"widget":{"header":"default-header","body":"line-chart-view","title":"SERVICE - Open Connections"},"w":24,"h":9,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"traefik_service_open_connections","entityId":"","type":"Traefik","filter":""}},"visualization":{"showLegend":true}},"i":"widget9","static":true,"x":24,"y":26},{"widget":{"header":"default-header","body":"stack-bar-chart-view","title":"Used Sockets"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"traefik_soket_used","entityId":"","type":"Traefik","filter":""}},"visualization":{"showLegend":true}},"i":"widget10","static":true,"x":0,"y":35},{"widget":{"header":"default-header","body":"stack-bar-chart-view","title":"Access to services / sec"},"w":24,"h":10,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"traefik_http_requests_rate_by_service","entityId":"","type":"Traefik","filter":""}},"visualization":{"showLegend":true}},"i":"widget11","static":true,"x":0,"y":16},{"widget":{"header":"default-header","body":"stack-bar-chart-view","title":"Access to Entrypoints / sec"},"w":24,"h":10,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"traefik_http_requests_rate_by_entrypoint","entityId":"","type":"Traefik","filter":""}},"visualization":{"showLegend":true}},"i":"widget12","static":true,"x":24,"y":16},{"widget":{"header":"default-header","body":"horizontal-bar-chart-view","title":"Horizontal Bar Chart"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"traefik_status_code_count","entityId":"","type":"Traefik","filter":""}},"visualization":{"showLegend":true}},"i":"widget13","static":true,"x":24,"y":35}]', + 'Traefik', (select id from auth_resource3 where name='dashboard|admin|Traefik'), 'admin', 'admin', NULL, true); +INSERT INTO public.dashboard2 (id, created_date, modified_date, layout, title, auth_resource_id, created_by, modified_by, description, "share") + VALUES(nextval('hibernate_sequence'), '2020-04-28 09:23:14.286', '2020-04-28 09:23:44.213', '[{"i":"widget0","widget":{"header":"default-header","body":"stat-view","title":"Current QPS"},"w":8,"h":8,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"mysql_query_per_second","entityId":"","type":"MySQL","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":2,"baseColor":"#409EFF","thresholds":[],"showLegend":true}},"x":16,"y":0,"static":true},{"i":"widget1","widget":{"header":"default-header","body":"stat-view","title":"InnoDB Buffer Pool Size (GiB)"},"w":8,"h":8,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"mysql_Innodb_buffer_pool_size","entityId":"","type":"MySQL","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":2,"baseColor":"#409EFF","thresholds":[],"showLegend":true}},"x":8,"y":0,"static":true},{"i":"widget2","widget":{"header":"default-header","body":"stat-view","title":"Number of Connection Attempts"},"w":8,"h":8,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"mysql_global_status_connections","entityId":"","type":"MySQL","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":2,"baseColor":"#409EFF","thresholds":[]}},"x":40,"y":0,"static":true},{"i":"widget3","widget":{"header":"default-header","body":"stat-view","title":"Max Connection","description":"Maximum permitted number of simultaneous client connections. By default, this is 151."},"w":8,"h":8,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"mysql_global_variables_max_connections","entityId":"","type":"MySQL","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":2,"baseColor":"#409EFF","thresholds":[]}},"x":24,"y":0,"static":true},{"i":"widget4","widget":{"header":"default-header","body":"stat-view","title":"Number of Max Used Connection","description":"Maximum number of connections that have been in use simultaneously since the server started."},"w":8,"h":8,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"mysql_global_status_max_used_connections","entityId":"","type":"MySQL","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":2,"baseColor":"#409EFF","thresholds":[]}},"x":32,"y":0,"static":true},{"i":"widget5","widget":{"header":"default-header","body":"stat-view","title":"Instance Up Count"},"w":8,"h":8,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"mysql_up","entityId":"","type":"MySQL","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":2,"baseColor":"#409EFF","thresholds":[],"showLegend":true}},"x":0,"y":0,"static":true},{"i":"widget6","widget":{"header":"default-header","body":"line-chart-view","title":"Buffer Cache Hit Ratio"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"mysql_cache_hit_ratio","entityId":"","type":"MySQL","filter":""}},"visualization":{"showLegend":true}},"x":24,"y":19,"static":true},{"i":"widget7","widget":{"header":"default-header","body":"line-chart-view","title":"Disk R/W Bytes"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"mysql_disk_io","entityId":"","type":"MySQL","filter":""}},"visualization":{"showLegend":true}},"x":24,"y":8,"static":true},{"i":"widget8","widget":{"header":"default-header","body":"line-chart-view","title":"Buffer I/O Bytes"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"mysql_buffer_io","entityId":"","type":"MySQL","filter":""}},"visualization":{"showLegend":true}},"x":0,"y":19,"static":true},{"i":"widget9","widget":{"header":"default-header","body":"line-chart-view","title":"Network Recieved / Transmit (KiB)"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"mysql_bytes_received_sent","entityId":"","type":"MySQL","filter":""}},"visualization":{"showLegend":true}},"x":0,"y":8,"static":true},{"i":"widget10","widget":{"header":"default-header","body":"stack-bar-chart-view","title":"Number of Locks"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"mysql_status_locks","entityId":"","type":"MySQL","filter":""}},"visualization":{"showLegend":true}},"x":0,"y":30,"static":true},{"i":"widget11","widget":{"header":"default-header","body":"line-chart-view","title":"MySQL Questions"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"mysql_global_status_questions","entityId":"","type":"MySQL","filter":""}},"visualization":{"showLegend":true}},"x":24,"y":30,"static":true},{"i":"widget12","widget":{"header":"default-header","body":"line-chart-view","title":"Line Chart"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"mysql_threads_total","entityId":"","type":"MySQL","filter":""}},"visualization":{"showLegend":true}},"x":0,"y":41,"static":true},{"widget":{"header":"default-header","body":"horizontal-bar-chart-view","title":"Command Executed"},"w":24,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"mysql_global_status_commands_total","entityId":"","type":"MySQL","filter":""}},"visualization":{"showLegend":true}},"i":"widget13","static":true,"x":24,"y":41}]', + 'MySQL / MariaDB', (select id from auth_resource3 where name='dashboard|admin|MySQL / MariaDB'), 'admin', 'admin', NULL, true); +INSERT INTO public.dashboard2 (id, created_date, modified_date, layout, title, auth_resource_id, created_by, modified_by, description, "share") + VALUES(nextval('hibernate_sequence'), '2020-04-28 09:23:14.286', '2020-04-28 09:23:44.213', '[{"i":"widget0","widget":{"header":"default-header","body":"stat-view","title":"Up"},"w":9,"h":9,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"nginx_ingress_nginx_up","entityId":"","type":"Nginx-Ingress","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":2,"baseColor":"#409EFF","thresholds":[]}},"x":0,"y":0,"static":true},{"i":"widget1","widget":{"header":"default-header","body":"line-chart-view","title":"CPU Usage (%)"},"w":12,"h":9,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"nginx-ingress","metricId":"container_cpu_usage_core_by_workload","entityId":"deploymentList,nginx-ingress-nginx-ingress","type":"workload"}},"visualization":{"showLegend":true}},"x":0,"y":9,"static":true},{"i":"widget2","widget":{"header":"default-header","body":"line-chart-view","title":"Memory Usage (GiB)"},"w":12,"h":9,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"nginx-ingress","metricId":"container_memory_usage_by_workload","entityId":"deploymentList,nginx-ingress-nginx-ingress","type":"workload"}},"visualization":{"showLegend":true}},"x":12,"y":9,"static":true},{"i":"widget3","widget":{"header":"default-header","body":"stat-view","title":"Connected Clients Count"},"w":10,"h":9,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"nginx_ingress_nginx_connections_active","entityId":"","type":"Nginx-Ingress","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":null,"baseColor":"#409EFF","thresholds":[],"showLegend":true}},"x":9,"y":0,"static":true},{"i":"widget4","widget":{"header":"default-header","body":"line-chart-view","title":"Network Transmit (KB)"},"w":12,"h":9,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"nginx-ingress","metricId":"container_network_transmit_by_workload","entityId":"deploymentList,nginx-ingress-nginx-ingress","type":"workload"}},"visualization":{"showLegend":true}},"x":24,"y":9,"static":true},{"i":"widget5","widget":{"header":"default-header","body":"line-chart-view","title":"Network Recieved (KiB)"},"w":12,"h":9,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"nginx-ingress","metricId":"container_network_receive_by_workload","entityId":"deploymentList,nginx-ingress-nginx-ingress","type":"workload"}},"visualization":{"showLegend":true}},"x":36,"y":9,"static":true},{"i":"widget6","widget":{"header":"default-header","body":"stat-view","title":"Nginx Ingress last reload status"},"w":10,"h":9,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"nginx_ingress_controller_nginx_last_reload_status","entityId":"","type":"Nginx-Ingress","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":2,"baseColor":"#409EFF","thresholds":[],"showLegend":true}},"x":19,"y":0,"static":true},{"i":"widget7","widget":{"header":"default-header","body":"stat-view","title":"Nginx Ingress reload success total"},"w":10,"h":9,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"nginx_ingress_controller_nginx_reloads_total","entityId":"","type":"Nginx-Ingress","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":2,"baseColor":"#409EFF","thresholds":[]}},"x":29,"y":0,"static":true},{"i":"widget8","widget":{"header":"default-header","body":"stat-view","title":"Nginx Ingress reload error total"},"w":9,"h":9,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"nginx_ingress_controller_nginx_reload_errors_total","entityId":"","type":"Nginx-Ingress","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":null,"max":null,"decimals":2,"baseColor":"#409EFF","thresholds":[]}},"x":39,"y":0,"static":true},{"i":"widget10","widget":{"header":"default-header","body":"line-chart-view","title":"Nginx Ingress Http Request Seconds"},"w":48,"h":15,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"nginx_ingress_nginx_http_requests_second","entityId":"","type":"Nginx-Ingress","filter":""}},"visualization":{"showLegend":true}},"x":0,"y":18,"static":true}]', + 'Nginx-Ingress', (select id from auth_resource3 where name='dashboard|admin|Nginx-Ingress'), 'admin', 'admin', NULL, true); + + + +-- 리포트 메일 발송 관련 테이블 추가 +CREATE TABLE public.report_group ( + id bigint NOT NULL, + created_by character varying(255), + created_date timestamp without time zone, + modified_by character varying(255), + modified_date timestamp without time zone, + users text, + name character varying(255) +); +ALTER TABLE public.report_group OWNER TO admin; + +ALTER TABLE ONLY public.report_group ADD CONSTRAINT report_group_pkey PRIMARY KEY (id); + +CREATE TABLE public.report_group_registry ( + id bigint NOT NULL, + report_template_id bigint NOT NULL, + report_group_id bigint NOT NULL +); + +ALTER TABLE public.report_group_registry OWNER TO admin; + +ALTER TABLE ONLY public.report_group_registry ADD CONSTRAINT report_group_registry_pkey PRIMARY KEY (id); +ALTER TABLE ONLY public.report_group_registry ADD CONSTRAINT report_group_registry_template_id_fk FOREIGN KEY (report_template_id) REFERENCES public.report_template(id); +ALTER TABLE ONLY public.report_group_registry ADD CONSTRAINT report_group_registry_group_id_fk FOREIGN KEY (report_group_id) REFERENCES public.report_group(id); + +-- CLOUD-13633; 대시보드 관련 패치 +-- Traefik 대시보드 지원 관련 metric meta 추가 +-- Auto-generated SQL script #202303161740 +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,message) + VALUES ('traefik_uptime','Traefik Uptime','Traefik Process Uptime','time() - process_start_time_seconds{job="cmoa-traefik"}','Instance','Traefik','xm_clst_id, xm_namespace, xm_pod_id',true,'None'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,message) + VALUES ('traefik_soket_used','Traefik Socket Used','Number of sockets Traefik is using','process_open_fds{job=~"cmoa-traefik"}','Instance','Traefik','xm_clst_id, xm_namespace, xm_pod_id',true,'None'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,in_use,message) + VALUES ('traefik_http_requests_rate_last_5m','Traefik HTTP Requests Rate','Number of http requests per second received by Traefik within 5 minutes','sum(rate(traefik_service_requests_total[5m]))','Request','Traefik',true,'CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} POD:{{$labels.xm_pod_id}} Trafik HTTP Request Rate :{{humanize $value}}|{threshold}.'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,in_use,message) + VALUES ('traefik_http_requests_rate_by_service','Traefik HTTP Requets Rate by Service','Number of http requests per second grouped by service within 5 minutes','sum(rate(traefik_service_requests_total[5m])) by (service)','Request','Traefik',true,'CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} POD:{{$labels.xm_pod_id}} Trafik HTTP Request Rate :{{humanize $value}}|{threshold}.'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,in_use,message) + VALUES ('traefik_http_requests_rate_by_entrypoint','Traefik HTTP Requets Rate by Entrypoint','Number of http requests per second grouped by entrypoint within 5 minutes','sum(rate(traefik_entrypoint_requests_total[5m])) by (entrypoint)','Request','Traefik',true,'None'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,message) + VALUES ('traefik_4xx_error_rate_last_5m','Traefik 4xx Error Rate','4xx error requests per second rate within 5 minutes','sum(rate(traefik_service_requests_total{code=~"4[0-9]{2}"}[5m]))','Request','Traefik','xm_clst_id, xm_namespace, xm_pod_id',true,'CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} POD:{{$labels.xm_pod_id}} Trafik 4xx Error Rate :{{humanize $value}}|{threshold}.'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,message) + VALUES ('traefik_5xx_error_rate_last_5m','Traefik 5xx Error Rate','5xx error requests per second rate within 5 minutes','sum(rate(traefik_service_requests_total{code=~"5[0-9]{2}"}[5m]))','Request','Traefik','xm_clst_id, xm_namespace, xm_pod_id',true,'CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} POD:{{$labels.xm_pod_id}} Trafik 5xx Error Rate :{{humanize $value}}|{threshold}.'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,message) + VALUES ('traefik_http_requests_4xx_total_last_10m','Traefik HTTP Requests 4xx Total Last 10m','Number of 4xx error requests in 10 minutes','sum(increase(traefik_service_requests_total{code=~"4[0-9]{2}"}[10m]))','Request','Traefik','xm_clst_id, xm_namespace, xm_pod_id',true,'None'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,message) + VALUES ('traefik_http_requests_5xx_total_last_10m','Traefik HTTP Requests 5xx Total Last 10m','Number of 5xx error requests in 10 minutes','sum(increase(traefik_service_requests_total{code=~"5[0-9]{2}"}[10m]))','Request','Traefik','xm_clst_id, xm_namespace, xm_pod_id',true,'None'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,message) + VALUES ('traefik_http_requests_4xx_total_last_1h','Traefik HTTP Requests 4xx Total Last 1h','Number of 4xx error requests in 1 hour','sum(increase(traefik_service_requests_total{code=~"4[0-9]{2}"}[1h]))','Request','Traefik','xm_clst_id, xm_namespace, xm_pod_id',true,'None'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,message) + VALUES ('traefik_http_requests_5xx_total_last_1h','Traefik HTTP Requests 4xx Total Last 1h','Number of 5xx error requests in 1 hour','sum(increase(traefik_service_requests_total{code=~"5[0-9]{2}"}[1h]))','Request','Traefik','xm_clst_id, xm_namespace, xm_pod_id',true,'None'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,in_use,message) + VALUES ('traefik_http_response_time_ms','Traefik HTTP Response Time Milisecond','Traefik HTTP Response Time (ms)','sum(increase(traefik_service_request_duration_seconds_sum[5m])) / sum(increase(traefik_entrypoint_requests_total[5m])) * 1000','Response','Traefik',true,'None'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,in_use,message) + VALUES ('traefik_entrypoint_open_connections','Traefik Entrypoint Open Connections','The current count of open connections on an entrypoint','sum(traefik_entrypoint_open_connections) by (method)','Entrypoint','Traefik',true,'None'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,in_use,message) + VALUES ('traefik_service_open_connections','Traefik Service Open Connections','The current count of open connections on an service','sum(traefik_service_open_connections) by (method)','Service','Traefik',true,'None'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,message) + VALUES ('traefik_http_requests_total_last_10m','Traefik HTTP Requests Total Last 10m','Number requests in 10 minutes','sum(increase(traefik_service_requests_total{}[10m]))','Request','Traefik','xm_clst_id, xm_namespace, xm_pod_id',true,'CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} POD:{{$labels.xm_pod_id}} Trafik Request Count (10m) :{{humanize $value}}|{threshold}.'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,message) + VALUES ('traefik_http_requests_total_last_1h','Traefik HTTP Requests Total Last 1h','Number requests in 1 hour','sum(increase(traefik_service_requests_total{}[1h]))','Request','Traefik','xm_clst_id, xm_namespace, xm_pod_id',true,'CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} POD:{{$labels.xm_pod_id}} Trafik Request Count (1h) :{{humanize $value}}|{threshold}.'); + +-- MySQL-MariaDB 대시보드 지원 관련 metric meta 추가 +-- Auto-generated SQL script #202303201205 +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) + VALUES ('mysql_query_per_second','Query Per Second','Based on the queries reported by MySQL''s SHOW STATUS command, it is the number of statements executed by the server within the last second.','irate(mysql_global_status_queries{}[5m])','Query','MySQL','instance',true,true,'CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} MySQL QPS :{{humanize $value}}%|{threshold}%.'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,message) + VALUES ('mysql_Innodb_buffer_pool_size','InnoDB Buffer Pool Size in GiB','Describes a storage area called the buffer pool for caching data and indexes in memory.','mysql_global_variables_innodb_buffer_pool_size / 1024 / 1024 / 1024','InnoDB','MySQL','instance',true,'None'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,message) + VALUES ('mysql_Innodb_buffer_pool_size_gib_with_limit','InnoDB Buffer Pool Size with Limit','Describes a storage area called the buffer pool for caching data and indexes in memory.','(mysql_global_variables_innodb_buffer_pool_size{} * 100) / on (xm_pod_id,xm_cont_name) imxc_kubernetes_container_resource_limit_memory{xm_pod_id!~"|POD", pod_state="Running"}','InnoDB','MySQL','instance',true,'None'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,message) + VALUES ('mysql_global_status_questions','MySQL Questions','The number of statements executed by the server.','rate(mysql_global_status_questions{}[5m])','Query','MySQL','instance',true,'None'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,message) + VALUES ('mysql_global_status_max_used_connections','Max Used Connections','Maximum number of connections that have been in use simultaneously since the server started.','mysql_global_status_max_used_connections','Connection','MySQL','instance',true,'None'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,message) + VALUES ('mysql_global_variables_max_connections','Max Connections','Maximum permitted number of simultaneous client connections. By default, this is 151.','mysql_global_variables_max_connections','Connection','MySQL','instance',true,'None'); + +-- Auto-generated SQL script #202303201412 +UPDATE public.metric_meta2 + SET expr='max(max_over_time(mysql_global_status_threads_connected[5m]) or mysql_global_status_threads_connected )' + WHERE id='mysql_global_status_connections'; +UPDATE public.metric_meta2 + SET groupby_keys='instance' + WHERE id='mysql_global_status_connections'; +-- Auto-generated SQL script #202303210950 +DELETE FROM public.metric_meta2 + WHERE id='mysql_global_status_cache_hit_ratio'; + +-- Auto-generated SQL script #202303210950 +UPDATE public.metric_meta2 +SET expr = e'sum by (data_type, xm_clst_id, xm_namespace, xm_node_id, instance) ( +label_replace(irate(mysql_global_status_innodb_buffer_pool_write_requests[5m]), "data_type", "write", "", "") or +label_replace(irate(mysql_global_status_innodb_buffer_pool_read_requests[5m]), "data_type", "read", "", "") )' +WHERE id LIKE 'mysql#_buffer#_io' ESCAPE '#'; + +-- Auto-generated SQL script #202303210950 +UPDATE public.metric_meta2 +SET expr = e'sum by (data_type, xm_clst_id, xm_namespace, xm_node_id, instance) ( + label_replace(rate(mysql_global_status_bytes_received [1m])/1024, "data_type", "received", "", "") or + label_replace(rate(mysql_global_status_bytes_sent [1m])/1024, "data_type", "sent", "", ""))' +WHERE id LIKE 'mysql#_bytes#_received#_sent' ESCAPE '#'; + +-- Nginx Ingress 대시보드 지원 관련 metric meta 추가 +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('nginx_ingress_nginx_up','Nginx Ingress Up','Nginx Ingress Up','sum by (xm_clst_id, xm_namespace, xm_node_id, instance, class) (nginx_ingress_nginx_up)','Pod','Nginx-Ingress','',true,false,'None'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('nginx_ingress_nginx_connections_active','Nginx Ingress Connection Active','Nginx Ingress Client Connection Active','sum by (xm_clst_id, xm_namespace, xm_node_id, instance, class) (nginx_ingress_nginx_connections_active)','Connection','Nginx-Ingress','',true,false,'None'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('nginx_ingress_controller_nginx_last_reload_status','Nginx Ingress last reload status','Status of the last NGINX reload','sum by (xm_clst_id, xm_namespace, xm_node_id, instance, class) (nginx_ingress_controller_nginx_reload_errors_total)','Config','Nginx-Ingress','',true,false,'None'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('nginx_ingress_controller_nginx_reload_errors_total','Nginx Ingress reload error total','Number of unsuccessful NGINX reloads','sum by (xm_clst_id, xm_namespace, xm_node_id, instance, class) (nginx_ingress_controller_nginx_reload_errors_total)','Config','Nginx-Ingress','',true,false,'None'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('nginx_ingress_controller_nginx_reloads_total','Nginx Ingress reload success total','Number of successful NGINX reloads','sum by (xm_clst_id, xm_namespace, xm_node_id, instance, class) (nginx_ingress_controller_nginx_reloads_total)','Config','Nginx-Ingress','',true,false,'None'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('nginx_ingress_nginx_http_requests_total','Nginx Ingress Http Request Total','Total http requests','sum by (xm_clst_id, xm_namespace, xm_node_id, instance, class) (nginx_ingress_nginx_http_requests_total)','Request','Nginx-Ingress','',true,false,'None'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('nginx_ingress_nginx_http_requests_second','Nginx Ingress Http Request Seconds','Http request per second','sum by (xm_clst_id, xm_namespace, xm_node_id, instance, class) (rate(nginx_ingress_nginx_http_requests_total[1m]))','Request','Nginx-Ingress','',true,false,'None'); diff --git a/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.5.1.psql b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.5.1.psql new file mode 100644 index 0000000..cc442f9 --- /dev/null +++ b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.5.1.psql @@ -0,0 +1,48 @@ + +-- CLOUD-16405 Metric Meta | 대시보드 관련 메트릭 수정 +-- 대시보드 관련 메트릭 추가 및 오타 수정 +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) VALUES + ('redis_connected_slave_lag_seconds','Redis Connected Slave Lag Seconds','Redis Connected Slave Lag Seconds','sum by (xm_clst_id, xm_namespace, xm_node_id, instance, slave_ip, slave_port) (rate(redis_connected_slave_lag_seconds[1m]))','Worker','Redis',NULL,true,true,'Redis Connected Slave Lag Seconds','2023-03-16 14:39:57.420','2023-03-16 14:39:57.420'), + ('redis_connected_slave_offset_bytes','Redis Connected Slave Offset Bytes','Redis Connected Slave Offset Bytes','sum by (xm_clst_id, xm_namespace, xm_node_id, instance, slave_ip, slave_port) (rate(redis_connected_slave_offset_bytes[1m]))','Worker','Redis',NULL,true,true,'Redis Connected Slave Offset Bytes','2023-03-16 14:37:43.734','2023-03-16 14:38:22.164'), + ('redis_connected_slaves','Redis Connected Slaves','Redis Connected Slaves','sum by (xm_clst_id, xm_namespace, xm_node_id, instance) (redis_connected_slaves)','Worker','Redis',NULL,true,true,'Redis Connected Slaves','2023-03-16 13:57:09.423','2023-03-16 13:59:50.746'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) VALUES + ('traefik_http_requests_bad_total_last_10m','Traefik HTTP Requests Bad Total Last 10m','Number bad requests in 10 minutes','sum(increase(traefik_service_requests_total{code=~"4[0-9]{2}|5[0-9]{2}"}[10m]))','Request','Traefik','service',true,false,'CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} POD:{{$labels.xm_pod_id}} Trafik Bad Request Count (10m) :{{humanize $value}}|{threshold}.','2023-03-17 14:10:13.163','2023-03-17 14:10:13.163'), + ('traefik_http_requests_bad_total_last_1h','Traefik HTTP Requests Bad Total Last 1h','Number bad requests in 1 hour','sum(increase(traefik_service_requests_total{code=~"4[0-9]{2}|5[0-9]{2}"}[1h]))','Request','Traefik','service',true,false,'CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} POD:{{$labels.xm_pod_id}} Trafik Bad Request Count (1h) :{{humanize $value}}|{threshold}.','2023-03-17 14:10:13.163','2023-03-17 14:10:13.163'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) VALUES + ('traefik_status_code_count','Traefik Status Code Count ','Traefik Status Code Count ','sum(increase(traefik_service_requests_total{protocol=~"http|https"}[10m])) by (code)','Request','Traefik','code',true,false,'None','2023-03-17 14:33:13.020','2023-03-17 14:33:13.020'); +-- Auto-generated SQL script #202305081122 +UPDATE public.metric_meta2 + SET meta_name='Traefik HTTP Requests 5xx Total Last 1h' + WHERE id='traefik_http_requests_5xx_total_last_1h'; + +-- CLOUD-16467 Metric Meta | Secret 정보를 저장하는 Table 을 추가한다. +CREATE TABLE cmoa_secret_base ( + kube_flatting_time bigint not null, + cluster_id varchar(255) not null, + kind varchar(30) not null, + metadata_uid varchar(40) not null, + row_index integer not null, + metadata_name text, + kind_status varchar(50), + metadata_resourceversion text, + metadata_annotations text, + metadata_creationtimestamp varchar(25), + metadata_labels text, + metadata_namespace text, + data text, + type text, + create_time timestamp default now(), + PRIMARY KEY (kube_flatting_time, cluster_id, kind, metadata_uid, row_index) +); + +ALTER TABLE cmoa_pod_volume + add spec_volumes_persistentvolumeclaim text; + +-- CLOUD-16396 Setting>Deploy List 에서 scale, yaml 에 대한 권한 때문에 menu 초기화 +INSERT INTO public.auth_resource3 (name, is_deleted, tenant_id) VALUES ('menu|Workloads|Deploy List|Scale', false, null); +INSERT INTO public.auth_resource3 (name, is_deleted, tenant_id) VALUES ('menu|Workloads|Deploy List|Yaml', false, null); +INSERT INTO public.auth_resource3 (name, is_deleted, tenant_id) VALUES ('menu|Workloads|Deploy List|List', false, null); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) VALUES (13, 'deployList(List)', NULL, 5, '', (select id from auth_resource3 where name='menu|Workloads|Deploy List|List'), 0); +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) VALUES (14, 'deployList(Scale)', NULL, 6, '', (select id from auth_resource3 where name='menu|Workloads|Deploy List|Scale'), 0); +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) VALUES (15, 'deployList(Yaml)', NULL, 7, '', (select id from auth_resource3 where name='menu|Workloads|Deploy List|Yaml'), 0); \ No newline at end of file diff --git a/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.5.2.psql b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.5.2.psql new file mode 100644 index 0000000..e078e49 --- /dev/null +++ b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.5.2.psql @@ -0,0 +1,72 @@ +-- Job 상태 표시를 위해 view table 생성 +CREATE VIEW v_cmoa_job AS + select Q.kube_flatting_time, Q.cluster_id, Q.metadata_ownerreferences_kind, Q.metadata_uid, K.status_phase + from (select A.kube_flatting_time as job_flatting_time, A.metadata_uid as job_metadata_uid, B.kube_flatting_time as pod_flatting_time + from (select metadata_uid, max(kube_flatting_time) as kube_flatting_time + from cmoa_job_base + where kind='Job' and kube_flatting_time > extract(epoch from (CURRENT_TIMESTAMP - INTERVAL '1 hours'))*1000 + group by metadata_uid) A, + (select metadata_ownerreferences_uid, max(kube_flatting_time) as kube_flatting_time + from cmoa_pod_base + where metadata_ownerreferences_kind='Job' and kube_flatting_time > extract(epoch from (CURRENT_TIMESTAMP - INTERVAL '1 hours'))*1000 + group by metadata_ownerreferences_uid) B + where A.metadata_uid = B.metadata_ownerreferences_uid) J + inner join cmoa_job_base Q on Q.metadata_uid = J.job_metadata_uid and Q.kube_flatting_time = J.job_flatting_time + inner join cmoa_pod_base K on K.metadata_ownerreferences_uid = J.job_metadata_uid and K.kube_flatting_time = J.pod_flatting_time + group by Q.kube_flatting_time, Q.cluster_id, Q.metadata_ownerreferences_kind, Q.metadata_uid, K.status_phase; + +-- CLOUD-17051 | Dashboard > Redis 관련 이슈 대응 +-- Auto-generated SQL script #202306011616 +UPDATE public.metric_meta2 + SET expr='sum by (data_type, xm_clst_id, xm_namespace, xm_node_id, instance) ( +label_replace(rate(redis_cpu_sys_seconds_total[1m]), "data_type", "system", "", "") or +label_replace(rate(redis_cpu_user_seconds_total[1m]), "data_type", "user", "", "") )' + WHERE id='redis_cpu_usage'; +UPDATE public.metric_meta2 + SET expr='sum by (xm_clst_id, xm_namespace, xm_node_id, instance) (rate(redis_cpu_sys_children_seconds_total[1m]))' + WHERE id='redis_used_cpu_sys_children'; +UPDATE public.metric_meta2 + SET expr='sum by (xm_clst_id, xm_namespace, xm_node_id, instance) (rate(redis_cpu_user_seconds_total[1m]))' + WHERE id='redis_used_cpu_user'; +UPDATE public.metric_meta2 + SET expr='sum by (xm_clst_id, xm_namespace, xm_node_id, instance) (rate(redis_cpu_user_children_seconds_total [1m]))' + WHERE id='redis_used_cpu_user_children'; +UPDATE public.metric_meta2 + SET expr='sum by (xm_clst_id, xm_namespace, xm_node_id, instance) (rate(redis_cpu_sys_seconds_total[1m]))' + WHERE id='redis_used_cpu_sys'; + +-- CLOUD-17354 | NginX 관련 metric meta 추가 +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('nginx_up','Nginx Up','Nginx Up Time','sum by (instance) (nginx_up)','State','Nginx','','true','false','"Nginx Up Time : {{$value}} , Threshold : {threshold}"'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('nginx_connections_accepted','Nginx Accepted Connection Count','Accepted connection count of Nginx','sum by (instance) (irate(nginx_connections_accepted[5m]))','Connection','Nginx','','true','false','"Nginx Connection : {{$value}} , Threshold : {threshold}"'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('nginx_connections_handled','Nginx Handled Connection Count','Handled connection count of Nginx','sum by (instance) (irate(nginx_connections_handled[5m]))','Connection','Nginx','','true','false','"Nginx Connection : {{$value}} , Threshold : {threshold}"'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('nginx_connections_active','Nginx Active Connection Count','Active connection count of Nginx','sum by (instance) (nginx_connections_active)','Connection','Nginx','','true','false','"Nginx Connection : {{$value}} , Threshold : {threshold}"'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('nginx_connections_reading','Nginx Reading Connection Count','Reading connection count of Nginx','sum by (instance) (nginx_connections_reading)','Connection','Nginx','','true','false','"Nginx Connection : {{$value}} , Threshold : {threshold}"'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('nginx_connections_waiting','Nginx Waiting Connection Count','Waiting connection count of Ngin','sum by (instance) (nginx_connections_waiting)','Connection','Nginx','','true','false','"Nginx Connection : {{$value}} , Threshold : {threshold}"'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('nginx_connections_writing','Nginx Writing Connection Count','Writing connection count of NginX','sum by (instance) (nginx_connections_writing)','Connection','Nginx','','true','false','"Nginx Connection : {{$value}} , Threshold : {threshold}"'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('nginx_http_requests_total','Nginx Http Requests','Http request count of Nginx','sum by (instance) (irate(nginx_http_requests_total[1m]))','Request','Nginx','','true','false','"Http requests : {{$value}} , Threshold : {threshold}"'); + +-- CLOUD-17354 | NginX Dashboard 추가 +INSERT INTO public.auth_resource3 (name, is_deleted, tenant_id) VALUES ('dashboard|admin|Nginx', false, null); +INSERT INTO public.dashboard2 (id, created_date, modified_date, layout, title, auth_resource_id, created_by, modified_by, description, "share") + VALUES(nextval('hibernate_sequence'),'2023-06-28 15:16:36.511000','2023-06-29 09:33:06.665000','[{"i":"widget1","widget":{"header":"default-header","body":"stack-bar-chart-view","title":"Accepted Connections","description":"Accepted connection count of Nginx"},"w":18,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"nginx_connections_accepted","entityId":"","type":"Nginx","filter":""}},"visualization":{"showLegend":true}},"x":30,"y":11,"static":true},{"i":"widget2","widget":{"header":"default-header","body":"stack-bar-chart-view","title":"Handled Connections","description":"Handled connection count of Nginx"},"w":18,"h":14,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"nginx_connections_handled","entityId":"","type":"Nginx","filter":""}},"visualization":{"showLegend":true}},"x":30,"y":22,"static":true},{"i":"widget3","widget":{"header":"default-header","body":"line-chart-view","title":"Active Connections","description":"Active connection count of Nginx"},"w":12,"h":14,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"nginx_connections_active","entityId":"","type":"Nginx","filter":""}},"visualization":{"showLegend":true}},"x":0,"y":8,"static":true},{"i":"widget4","widget":{"header":"default-header","body":"line-chart-view","title":"Waiting Connections","description":"Waiting connection count of Nginx"},"w":12,"h":14,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"nginx_connections_waiting","entityId":"","type":"Nginx","filter":""}},"visualization":{"showLegend":true}},"x":12,"y":8,"static":true},{"i":"widget5","widget":{"header":"default-header","body":"stack-bar-chart-view","title":"Reading Connections","description":"Reading connection count of Nginx"},"w":12,"h":14,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"nginx_connections_reading","entityId":"","type":"Nginx","filter":""}},"visualization":{"showLegend":true}},"x":0,"y":22,"static":true},{"i":"widget6","widget":{"header":"default-header","body":"stack-bar-chart-view","title":"Writing Connection","description":"Writing connection count of Nginx"},"w":12,"h":14,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"nginx_connections_writing","entityId":"","type":"Nginx","filter":""}},"visualization":{"showLegend":true}},"x":12,"y":22,"static":true},{"i":"widget7","widget":{"header":"default-header","body":"label-view","title":" "},"w":24,"h":3,"minW":2,"minH":1,"maxW":48,"maxH":36,"component":{"api":{},"visualization":{"background":null,"fontSize":20,"textAlign":"left","padding":[0,18],"sideways":false,"fontWeight":"bold","text":"Nginx Information"}},"x":0,"y":0,"static":true},{"i":"widget0","widget":{"header":"default-header","body":"stat-view","title":"Active"},"w":6,"h":5,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"nginx_connections_active","entityId":"","type":"Nginx","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":"0","max":null,"decimals":null,"baseColor":"#409EFF","thresholds":[]}},"x":0,"y":3,"static":true},{"i":"widget8","widget":{"header":"default-header","body":"stat-view","title":"Waiting"},"w":6,"h":5,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"nginx_connections_waiting","entityId":"","type":"Nginx","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":"0","max":null,"decimals":null,"baseColor":"#409EFF","thresholds":[]}},"x":6,"y":3,"static":true},{"i":"widget9","widget":{"header":"default-header","body":"stat-view","title":"Reading"},"w":6,"h":5,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"nginx_connections_reading","entityId":"","type":"Nginx","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":"0","max":null,"decimals":null,"baseColor":"#409EFF","thresholds":[]}},"x":12,"y":3,"static":true},{"i":"widget10","widget":{"header":"default-header","body":"stat-view","title":"Writing"},"w":6,"h":5,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"nginx_connections_writing","entityId":"","type":"Nginx","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":"0","max":null,"decimals":null,"baseColor":"#409EFF","thresholds":[]}},"x":18,"y":3,"static":true},{"i":"widget11","widget":{"header":"default-header","body":"line-chart-view","title":"Http Request"},"w":18,"h":11,"minW":8,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":false,"range":true,"clusterId":"cloudmoa","namespace":"","metricId":"nginx_http_requests_total","entityId":"","type":"Nginx","filter":""}},"visualization":{"showLegend":true}},"x":30,"y":0,"static":true},{"i":"widget12","widget":{"header":"default-header","body":"stat-view","title":"Http Request"},"w":6,"h":11,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"nginx_http_requests_total","entityId":"","type":"Nginx","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":"0","max":null,"decimals":null,"baseColor":"#409EFF","thresholds":[]}},"x":24,"y":0,"static":true},{"i":"widget13","widget":{"header":"default-header","body":"stat-view","title":"Accepted Count"},"w":6,"h":11,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"nginx_connections_accepted","entityId":"","type":"Nginx","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":"0","max":null,"decimals":null,"baseColor":"#409EFF","thresholds":[]}},"x":24,"y":11,"static":true},{"i":"widget14","widget":{"header":"default-header","body":"stat-view","title":"Handled Count"},"w":6,"h":14,"minW":6,"minH":4,"maxW":48,"maxH":18,"component":{"api":{"uri":"metric.chart","params":{"unique":true,"range":false,"clusterId":"cloudmoa","namespace":"","metricId":"nginx_connections_handled","entityId":"","type":"Nginx","filter":""}},"visualization":{"showGauge":false,"showPercent":false,"min":"0","max":null,"decimals":null,"baseColor":"#409EFF","thresholds":[]}},"x":24,"y":22,"static":true}]', + 'Nginx Dashboard',(select id from auth_resource3 where name='dashboard|admin|Nginx'),'admin','admin','Nginx Dashboard',true); + +-- CLOUD-17326 | Kube Event 관련 metric meta 추가 +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_node','Kubernetes Node Event','Kubernetes Node Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="Node"})','Event','Node','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_job','Kubernetes Job Event','Kubernetes Job Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="Job"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_cronjob','Kubernetes CronJob Event','Kubernetes CronJob Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="CronJob"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_daemonset','Kubernetes DaemonSet Event','Kubernetes DaemonSet Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="DaemonSet"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_deployment','Kubernetes Deployment Event','Kubernetes Deployment Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="Deployment"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_endpoints','Kubernetes Endpoints Event','Kubernetes Endpoints Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="Endpoints"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_persistentvolumeclaim','Kubernetes PersistentVolumeClaim Event','Kubernetes PersistentVolumeClaim Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="PersistentVolumeClaim"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_pod','Kubernetes Pod Event','Kubernetes Pod Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="Pod"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_warning','Kubernetes Warning Event','Kubernetes Warning Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{type="Warning"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Name: {{$involved_name}} Reason : {{$labels.reason}} Level : {{$labels.type}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_replicaset','Kubernetes ReplicaSet Event','Kubernetes ReplicaSet Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="ReplicaSet"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_statefulset','Kubernetes StatefulSet Event','Kubernetes StatefulSet Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="StatefulSet"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} '); + +-- Container 의 Limit 대비 CPU/Memory 사용량 metric meta 추가 +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('container_cpu_usage_against_limit','Container CPU Usage Against Limits (%)','Conatiner CPU Usage Against Limits ( % ) ','sum by(xm_clst_id,xm_namespace,owner_name,xm_pod_id,xm_cont_name,xm_entity_type) (rate(imxc_kubernetes_container_resource_limit_cpu{xm_cont_name!="POD"} [1m])+ on (xm_clst_id,xm_namespace,xm_pod_id,xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running",pod_state="Running",{filter}}) without (instance)) * 0)','CPU','Workload','','true','false','CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} CPU Usage (%):{{humanize $value}}%|{threshold}%.'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('container_memory_usage_against_limit','Container Memory Usage Against Limit','Container Memory Usage Against Limit','sum by(xm_clst_id,xm_namespace,xm_pod_id,xm_cont_name,xm_entity_type,owner_name) (container_memory_usage_bytes{xm_cont_name!="POD"} / on (xm_clst_id,xm_namespace,xm_cont_name,xm_pod_id) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_memory{container_state="Running",pod_state="Running",xm_cont_name!="POD",{filter}}) without (instance))) * 100','Memory','Workload','','true','false','CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Memory Limit Against Usage:{{humanize $value}}%|{threshold}%.'); + + +commit; diff --git a/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.5.3.psql b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.5.3.psql new file mode 100644 index 0000000..d20f05e --- /dev/null +++ b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.5.3.psql @@ -0,0 +1,385 @@ +-- CLOUD-19295 | Host Process 관련 metric meta 추가 +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES ('host_process_cpu_second_total','Host Process CPU Second Total (%)','CPU user usage in seconds ( % )','sum by (groupname) (rate(namedprocess_namegroup_cpu_seconds_total{{filter}}[1m]))*100','Process','Host','','true','false','"Host:{{$labels.instance}} Process CPU :{{humanize $value}}|{threshold}."'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('host_process_memory_bytes','Host Process memory bytes (MiB)','number of bytes of memory in use / 1048576','sum by (groupname) ((namedprocess_namegroup_memory_bytes{ memtype="resident", {filter}})) / 1048576','Process','Host','','true','false','"Host:{{$labels.instance}} Process memory :{{humanize $value}}MiB|{threshold}MiB."'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('host_process_read_bytes','Host Process read bytes ( KiB)','number of bytes read by this group /1024','sum by (groupname) (rate(namedprocess_namegroup_read_bytes_total{{filter}}[1m]))/1024','Process','Host','','true','false','"Host:{{$labels.instance}} Disk Read Size:{{humanize $value}}KiB|{threshold}KiB."'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('host_process_write_bytes','Host Process write bytes(KiB)','number of bytes written by this group/1024','sum by (groupname) (rate(namedprocess_namegroup_write_bytes_total{{filter}}[1m]))/1024','Process','Host','','true','false','"Host:{{$labels.instance}} Disk Write Size:{{humanize $value}}KiB|{threshold}KiB."'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('host_process_count','Host Process number','number of processes in this group', 'sum by (groupname) (namedprocess_namegroup_num_procs{{filter}})', 'Process','Host','','true','false','"Host:{{$labels.instance}} Count:{{humanize $value}}|{threshold}."'); + +-- CLOUD-19283 Topology Agent 와 node exporter 통합 +-- Auto-generated SQL script #202307261024 +UPDATE public.agent_install_file_info + SET yaml='---',use_yn=false + WHERE id=4; +UPDATE public.agent_install_file_info + SET yaml='--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cloudmoa-cluster-role +rules: + - nonResourceURLs: + - "*" + verbs: + - get + - apiGroups: + - metrics.k8s.io + resources: + - pods + - nodes + verbs: + - get + - list + - watch + - apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch + - update + - apiGroups: + - "" + resources: + - services + verbs: + - get + - list + - watch + - update + - apiGroups: + - "" + resources: + - nodes/stats + - endpoints + - namespaces + - events + verbs: + - get + - list + - watch + - apiGroups: + - apps + resources: + - daemonsets + - deployments + - deployments/scale + - replicasets + - replicasets/scale + - statefulsets + - statefulsets/scale + verbs: + - get + - list + - watch + - update + - apiGroups: + - batch + resources: + - jobs + verbs: + - get + - list + - watch + - update + - apiGroups: + - batch + resources: + - cronjobs + verbs: + - get + - list + - update + - apiGroups: + - storage.j8s.io + resources: + - storageclasses + verbs: + - get + - list + - apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - get + - list + - apiGroups: + - extensions + resources: + - ingresses + verbs: + - get + - list + - apiGroups: + - policy + resources: + - podsecuritypolicies + verbs: + - use + resourceNames: + - imxc-ps + - apiGroups: + - certificates.k8s.io + resourceNames: + - kubernetes.io/kube-apiserver-client-kubelet + resources: + - signers + verbs: + - approve + - apiGroups: + - certificates.k8s.io + resourceNames: + - kubernetes.io/kubelet-serving + resources: + - signers + verbs: + - approve + - apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - watch + - proxy + - apiGroups: + - "" + resources: + - nodes/log + - nodes/metrics + - nodes/proxy + - nodes/spec + - nodes/stats + verbs: + - ''*'' + - apiGroups: + - ''*'' + resources: + - ''*'' + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cloudmoa-restricted-rb + namespace: $CLOUDMOA_NAMESPACE +subjects: + - kind: ServiceAccount + name: default + namespace: $CLOUDMOA_NAMESPACE +roleRef: + kind: ClusterRole + name: cloudmoa-cluster-role + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: cloudmoa-psp + namespace: $CLOUDMOA_NAMESPACE +spec: + privileged: true + seLinux: + rule: RunAsAny + supplementalGroups: + rule: RunAsAny + runAsUser: + rule: RunAsAny + fsGroup: + rule: RunAsAny + hostPorts: + - max: 65535 + min: 0 + hostNetwork: true + hostPID: true + volumes: + - configMap + - secret + - emptyDir + - hostPath + - projected + - downwardAPI + - persistentVolumeClaim +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: cloudmoa-topology-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-topology-agent +spec: + selector: + matchLabels: + app: cloudmoa-topology-agent + template: + metadata: + labels: + app: cloudmoa-topology-agent + spec: + hostNetwork: true + hostPID: true + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + containers: + - image: $DOCKER_REGISTRY_URL/node-exporter + name: node-agent + resources: + limits: + cpu: 250m + memory: 180Mi + requests: + cpu: 125m + memory: 90Mi + ports: + - containerPort: 9110 + hostPort: 9110 + name: scrape + args: + - --path.procfs=/host/proc + - --path.sysfs=/host/sys + - --path.rootfs=/host/root + - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|run|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/) + - --collector.tcpstat + - --web.listen-address=:9110 + # --log.level=debug + env: + - name: GOMAXPROCS + value: "1" + volumeMounts: + - mountPath: /host/proc + name: proc-volume + readOnly: false + - mountPath: /host/sys + name: sys-volume + readOnly: false + - mountPath: /host/root + mountPropagation: HostToContainer + name: root-volume + readOnly: true + - name: cloudmoa-topology-agent + image: $DOCKER_REGISTRY_URL/topology-agent:$IMAGE_TAG + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: 500m + memory: 600Mi + securityContext: + privileged: true + volumeMounts: + - mountPath: /host/usr/bin + name: bin-volume + - mountPath: /var/run/docker.sock + name: docker-volume + - mountPath: /host/proc + name: proc-volume + - mountPath: /root + name: root-volume + - mountPath: /log + name: log-volume + env: + - name: DATAGATE + value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: ROOT_DIRECTORY + value: /root + - name: NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: POD_ID + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: LOG_LEVEL + value: "INFO" + volumes: + - name: bin-volume + hostPath: + path: /usr/bin + type: Directory + - name: docker-volume + hostPath: + path: /var/run/docker.sock + - name: proc-volume + hostPath: + path: /proc + - name: root-volume + hostPath: + path: / + - hostPath: + path: /sys + name: sys-volume + - name: log-volume + hostPath: + path: /home' + WHERE id=2; + + +-- CLOUD-19460 Nginx metric에 {filter} 추가 +-- Nginx Ingress 대시보드 지원 관련 metric meta 추가 +UPDATE public.metric_meta2 SET expr = 'sum by (xm_clst_id, xm_namespace, xm_node_id, instance, class) (nginx_ingress_nginx_up{ {filter} })' WHERE id ='nginx_ingress_nginx_up'; +UPDATE public.metric_meta2 SET expr = 'sum by (xm_clst_id, xm_namespace, xm_node_id, instance, class) (nginx_ingress_nginx_connections_active{ {filter} })' WHERE id ='nginx_ingress_nginx_connections_active'; +UPDATE public.metric_meta2 SET expr = 'sum by (xm_clst_id, xm_namespace, xm_node_id, instance, class) (nginx_ingress_controller_nginx_reload_errors_total{ {filter} })' WHERE id ='nginx_ingress_controller_nginx_last_reload_status'; +UPDATE public.metric_meta2 SET expr = 'sum by (xm_clst_id, xm_namespace, xm_node_id, instance, class) (nginx_ingress_controller_nginx_reload_errors_total{ {filter} })' WHERE id ='nginx_ingress_controller_nginx_reload_errors_total'; +UPDATE public.metric_meta2 SET expr = 'sum by (xm_clst_id, xm_namespace, xm_node_id, instance, class) (nginx_ingress_controller_nginx_reloads_total{ {filter} })' WHERE id ='nginx_ingress_controller_nginx_reloads_total'; +UPDATE public.metric_meta2 SET expr = 'sum by (xm_clst_id, xm_namespace, xm_node_id, instance, class) (nginx_ingress_nginx_http_requests_total{ {filter} })' WHERE id ='nginx_ingress_nginx_http_requests_total'; +UPDATE public.metric_meta2 SET expr = 'sum by (xm_clst_id, xm_namespace, xm_node_id, instance, class) (rate(nginx_ingress_nginx_http_requests_total{ {filter} }[1m]))' WHERE id ='nginx_ingress_nginx_http_requests_second'; + +-- Nginx 대시보드 지원 관련 metric meta 추가 +UPDATE public.metric_meta2 SET expr = 'sum by (instance) (nginx_up{ {filter} })' WHERE id ='nginx_up'; +UPDATE public.metric_meta2 SET expr = 'sum by (instance) (irate(nginx_connections_accepted{ {filter} }[5m]))' WHERE id ='nginx_connections_accepted'; +UPDATE public.metric_meta2 SET expr = 'sum by (instance) (irate(nginx_connections_handled{ {filter} }[5m]))' WHERE id ='nginx_connections_handled'; +UPDATE public.metric_meta2 SET expr = 'sum by (instance) (nginx_connections_active{ {filter} })' WHERE id ='nginx_connections_active'; +UPDATE public.metric_meta2 SET expr = 'sum by (instance) (nginx_connections_reading{ {filter} })' WHERE id ='nginx_connections_reading'; +UPDATE public.metric_meta2 SET expr = 'sum by (instance) (nginx_connections_waiting{ {filter} })' WHERE id ='nginx_connections_waiting'; +UPDATE public.metric_meta2 SET expr = 'sum by (instance) (nginx_connections_writing{ {filter} })' WHERE id ='nginx_connections_writing'; +UPDATE public.metric_meta2 SET expr = 'sum by (instance) (irate(nginx_http_requests_total{ {filter} }[1m]))' WHERE id ='nginx_http_requests_total'; + +-- Kube Event 관련 metric meta 추가 +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_node','Kubernetes Node Event','Kubernetes Node Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="Node"})','Event','Node','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_job','Kubernetes Job Event','Kubernetes Job Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="Job"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_cronjob','Kubernetes CronJob Event','Kubernetes CronJob Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="CronJob"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_daemonset','Kubernetes DaemonSet Event','Kubernetes DaemonSet Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="DaemonSet"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_deployment','Kubernetes Deployment Event','Kubernetes Deployment Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="Deployment"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_endpoints','Kubernetes Endpoints Event','Kubernetes Endpoints Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="Endpoints"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_persistentvolumeclaim','Kubernetes PersistentVolumeClaim Event','Kubernetes PersistentVolumeClaim Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="PersistentVolumeClaim"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_pod','Kubernetes Pod Event','Kubernetes Pod Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="Pod"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_warning','Kubernetes Warning Event','Kubernetes Warning Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{type="Warning"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Name: {{$involved_name}} Reason : {{$labels.reason}} Level : {{$labels.type}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_replicaset','Kubernetes ReplicaSet Event','Kubernetes ReplicaSet Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="ReplicaSet"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_statefulset','Kubernetes StatefulSet Event','Kubernetes StatefulSet Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="StatefulSet"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} '); + +-- Event Alert 관련 metric meta 추가 +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('event_alert_successful_create','Event Alert SuccessfulCreate','Event Alert SuccessfulCreate','count by (xm_clst_id, xm_namespace, entity_type, involved_kind, involved_name, reason, type) (imxc_kubernetes_event_in_last_min{reason=~"SuccessfulCreate", {filter} })','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} Name : {{$labels.involved_name}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('event_alert_successful_delete','Event Alert SuccessfulDelete','Event Alert SuccessfulDelete','count by (xm_clst_id, xm_namespace, entity_type, involved_kind, involved_name, reason, type) (imxc_kubernetes_event_in_last_min{reason=~"SuccessfulDelete", {filter} })','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} Name : {{$labels.involved_name}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('event_alert_deadline_exceeded','Event Alert DeadlineExceeded','Event Alert DeadlineExceeded','count by (xm_clst_id, xm_namespace, entity_type, involved_kind, involved_name, reason, type) (imxc_kubernetes_event_in_last_min{reason=~"DeadlineExceeded", {filter} })','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} Name : {{$labels.involved_name}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('event_alert_job','Event Alert Job','Event Alert Job','count by (xm_clst_id, xm_namespace, entity_type, involved_kind, involved_name, reason, type) (imxc_kubernetes_event_in_last_min{involved_kind=~"Job", {filter} })','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} Name : {{$labels.involved_name}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('event_alert_cronjob','Event Alert CronJob','Event Alert CronJob','count by (xm_clst_id, xm_namespace, entity_type, involved_kind, involved_name, reason, type) (imxc_kubernetes_event_in_last_min{involved_kind=~"CronJob", {filter} })','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} Name : {{$labels.involved_name}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('event_alert_normal','Event Alert Normal','Event Alert Normal','count by (xm_clst_id, xm_namespace, involved_kind, involved_name, reason, type) (imxc_kubernetes_event_in_last_min{type=~"Normal", {filter} })','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} Name : {{$labels.involved_name}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('event_alert_warning','Event Alert Warning','Event Alert Warning','count by (xm_clst_id, xm_namespace, involved_kind, involved_name, reason, type) (imxc_kubernetes_event_in_last_min{type=~"Warning", {filter} })','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} Name : {{$labels.involved_name}} '); + +-- node-export 부분 node-agent 변경 +UPDATE public.metric_meta2 SET expr = '(100 - (avg by (xm_clst_id, xm_node_id, xm_entity_type)(clamp_max(rate(node_cpu_seconds_total{ name="node-agent", mode="idle", xm_entity_type="Node", {filter} }[1m]),1.0) * 100)))' WHERE id ='node_cpu_usage'; +UPDATE public.metric_meta2 SET expr = '(100 - (avg by (xm_clst_id)(clamp_max(rate(node_cpu_seconds_total{ name="node-agent", mode="idle", xm_entity_type="Node", {filter} }[1m]),1.0)) * 100))' WHERE id ='cluster_cpu_usage'; +UPDATE public.metric_meta2 SET expr = '(100 - (avg by (xm_clst_id, xm_node_id) (clamp_max(rate(node_cpu_seconds_total{name="node-agent", mode="idle", xm_entity_type="Node", {filter}}[1m]),1.0)) * 100)) * sum by(xm_clst_id, xm_node_id)(imxc_kubernetes_node_resource_capacity_cpu{{filter}}) / 100' WHERE id ='node_cpu_used'; +UPDATE public.metric_meta2 SET expr = 'avg by (xm_clst_id, xm_node_id, xm_entity_type) (rate(node_cpu_seconds_total{name="node-agent", mode="iowait", xm_entity_type="Node" , {filter}}[1m])) * 100' WHERE id ='node_cpu_iowait'; +UPDATE public.alert_rule_meta SET expr = '(100 - (avg by (xm_clst_id, xm_node_id, xm_entity_type) (rate(node_cpu_seconds_total{ name=''node-agent'', mode=''idle'', xm_entity_type=''Node'', {filter} }[1m])) * 100))' WHERE id = 97; +UPDATE public.agent_install_file_info SET name = 'node-agent' WHERE id=4; +commit; diff --git a/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.5.4.psql b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.5.4.psql new file mode 100644 index 0000000..b080944 --- /dev/null +++ b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_3.5.4.psql @@ -0,0 +1,68 @@ +-- DeploymentConfig 지원을 위한 Table 추가 +CREATE TABLE cmoa_deploymentconfig_base( + kube_flatting_time bigint, + cluster_id varchar(255), + kind varchar(30), + metadata_uid varchar(40), + row_index int, + kind_status varchar(50), + metadata_creationTimestamp varchar(25), + metadata_name text, + metadata_namespace text, + metadata_resourceVersion text, + spec_replicas text, + spec_template_spec_containers_image text, + spec_template_metadata_labels text, + status_availableReplicas text, + status_updatedReplicas text, + status_replicas text, + status_unavailableReplicas text, + create_time timestamp default now(), + PRIMARY KEY (kube_flatting_time, cluster_id, kind, metadata_uid, row_index) +); + +-- ReplicationController 지원을 위한 Table 추가 +CREATE TABLE cmoa_replicationcontroller_base ( + kube_flatting_time bigint, + cluster_id varchar(255), + kind varchar(30), + metadata_uid varchar(40), + row_index int, + kind_status varchar(50), + metadata_annotations text, + metadata_creationtimestamp varchar(25), + metadata_labels text, + metadata_name text, + metadata_namespace text, + metadata_resourceversion text, + spec_replicas text, + status_availablereplicas text, + status_readyreplicas text, + status_replicas text, + status_observedgeneration text, + create_time timestamp default now(), + PRIMARY KEY (kube_flatting_time, cluster_id, kind, metadata_uid, row_index) +); + +create table public.audit_log +( + id bigserial + constraint audit_log_pk + primary key, + created_date timestamp not null, + user_id varchar not null, + type varchar not null, + menu_path varchar not null, + result boolean not null, + target jsonb +); + + + +-- DeploymentConfig Event 관련 metric meta 추가 +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_deploymentconfig','Kubernetes DeploymentConfig Event','Kubernetes DeploymentConfig Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="DeploymentConfig", {filter} })','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} '); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('host_count','Host Count','Host Count','count(node_boot_time_seconds{is_host="true"})','Host','System','','true','false','Host Count : {{humanize $value}}'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('host_alive','Host Alive','Host Alive','count(node_boot_time_seconds{is_host="true", {filter}})','Host','System','','true','false','Host Alive : {{$labels.instance}}'); +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('host_dead','Host Dead','Host Dead','absent(node_boot_time_seconds{is_host="true", {filter}})','Host','System','','true','false','Host Dead : {{$labels.instance}}'); + +commit; diff --git a/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_R30020210503.psql b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_R30020210503.psql new file mode 100644 index 0000000..99d1dbe --- /dev/null +++ b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_R30020210503.psql @@ -0,0 +1,2844 @@ +ALTER TABLE alert_rule ADD COLUMN IF NOT EXISTS warning_sign character VARYING(255); +ALTER TABLE alert_rule ADD COLUMN IF NOT EXISTS critical_sign character VARYING(255); + +CREATE TABLE IF NOT EXISTS public.license_policy ( + policy_id character varying(255) NOT NULL, + policy_desc character varying(255), + term_year integer NOT NULL, + term_month integer NOT NULL, + term_day integer NOT NULL, + license_type character varying(255) NOT NULL, + allowable_range character varying(255) NOT NULL, + storage_capacity character varying(255) NOT NULL, + cluster_count character varying(255) NOT NULL, + node_count character varying(255) NOT NULL, + pod_count character varying(255) NOT NULL, + service_count character varying(255) NOT NULL, + core_count character varying(255) NOT NULL, + host_ids character varying(255) NOT NULL, + user_division character varying(255) NOT NULL, + created_date timestamp without time zone, + modified_date timestamp without time zone +) + +ALTER TABLE ONLY public.license_policy + ADD CONSTRAINT license_policy_pkey PRIMARY KEY (policy_id); + + +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + VALUES ('node_contextswitch_and_filedescriptor','Node contextswitch and filedescriptor','Node contextswitch and filedescriptor','sum by(xm_clst_id, xm_node_id, data_type) ( + label_replace(node_filefd_allocated {{filter}}, "data_type", "file descriptor" , "", "") or + label_replace(rate(node_context_switches_total {{filter}}[1m]), "data_type", "context switches", "" , ""))','File','Node',NULL,false,false,'Node contextswitch and filedescriptor','2020-05-28 12:38:21.587','2020-05-28 12:38:21.587') + ON CONFLICT (id) + DO + UPDATE SET (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + = ('node_contextswitch_and_filedescriptor','Node contextswitch and filedescriptor','Node contextswitch and filedescriptor','sum by(xm_clst_id, xm_node_id, data_type) ( + label_replace(node_filefd_allocated {{filter}}, "data_type", "file descriptor" , "", "") or + label_replace(rate(node_context_switches_total {{filter}}[1m]), "data_type", "context switches", "" , ""))','File','Node',NULL,false,false,'Node contextswitch and filedescriptor','2020-05-28 12:38:21.587','2020-05-28 12:38:21.587') + WHERE public.metric_meta2.id = 'node_contextswitch_and_filedescriptor'; + + +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + VALUES ('container_cpu_user_by_workload', 'Container CPU User By workload (%)', 'Container CPU Usage(User)', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (rate(container_cpu_user_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) * 100', 'CPU', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} CPU User (%):{{humanize $value}}%|{threshold}%.', now(), now()) + ON CONFLICT (id) + DO + UPDATE SET (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + = ('container_cpu_user_by_workload', 'Container CPU User By workload (%)', 'Container CPU Usage(User)', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (rate(container_cpu_user_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) * 100', 'CPU', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} CPU User (%):{{humanize $value}}%|{threshold}%.', now(), now()) + WHERE public.metric_meta2.id = 'container_cpu_user_by_workload'; + + +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + VALUES ('container_cpu_system_core_by_workload', 'Container CPU System By workload (Core)', 'Container CPU(Core)', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (rate(container_cpu_system_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0)', 'CPU', 'Workload', NULL, TRUE, FALSE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} CPU System (Core) (System):{{humanize $value}}%|{threshold}%.', now(), now()) + ON CONFLICT (id) + DO + UPDATE SET (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + = ('container_cpu_system_core_by_workload', 'Container CPU System By workload (Core)', 'Container CPU(Core)', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (rate(container_cpu_system_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0)', 'CPU', 'Workload', NULL, TRUE, FALSE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} CPU System (Core) (System):{{humanize $value}}%|{threshold}%.', now(), now()) + WHERE public.metric_meta2.id = 'container_cpu_system_core_by_workload'; + + +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + VALUES ('container_cpu_usage_core_by_workload', 'Container CPU Usage By workload (Core)', 'Container CPU Usage (Core)', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (rate(container_cpu_usage_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0)', 'CPU', 'Workload', NULL, TRUE, FALSE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} CPU Usage (Core):{{humanize $value}}|{threshold}.', now(), now()) + ON CONFLICT (id) + DO + UPDATE SET (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + = ('container_cpu_usage_core_by_workload', 'Container CPU Usage By workload (Core)', 'Container CPU Usage (Core)', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (rate(container_cpu_usage_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0)', 'CPU', 'Workload', NULL, TRUE, FALSE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} CPU Usage (Core):{{humanize $value}}|{threshold}.', now(), now()) + WHERE public.metric_meta2.id = 'container_cpu_usage_core_by_workload'; + + +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + VALUES ('container_cpu_user_core_by_workload', 'Container CPU User By workload (Core)', 'Container CPU Usage (User)(Core)', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (rate(container_cpu_user_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0)', 'CPU', 'Workload', NULL, TRUE, FALSE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} CPU User (Core):{{humanize $value}}|{threshold}.', now(), now()) + ON CONFLICT (id) + DO + UPDATE SET (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + = ('container_cpu_user_core_by_workload', 'Container CPU User By workload (Core)', 'Container CPU Usage (User)(Core)', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (rate(container_cpu_user_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0)', 'CPU', 'Workload', NULL, TRUE, FALSE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} CPU User (Core):{{humanize $value}}|{threshold}.', now(), now()) + WHERE public.metric_meta2.id = 'container_cpu_user_core_by_workload'; + + +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + VALUES ('container_cpu_system_by_workload', 'Container CPU System By workload (%)', 'Container CPU Usage (System)', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (rate(container_cpu_system_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) * 100', 'CPU', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} CPU System (%):{{humanize $value}}%|{threshold}%.', now(), now()) + ON CONFLICT (id) + DO + UPDATE SET (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + = ('container_cpu_system_by_workload', 'Container CPU System By workload (%)', 'Container CPU Usage (System)', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (rate(container_cpu_system_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) * 100', 'CPU', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} CPU System (%):{{humanize $value}}%|{threshold}%.', now(), now()) + WHERE public.metric_meta2.id = 'container_cpu_system_by_workload'; + + +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + VALUES ('container_cpu_usage_by_workload', 'Container CPU Usage By workload (%)', 'Container CPU Usage', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (rate(container_cpu_usage_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) * 100', 'CPU', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} CPU Usage (%):{{humanize $value}}%|{threshold}%', now(), now()) + ON CONFLICT (id) + DO + UPDATE SET (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + = ('container_cpu_usage_by_workload', 'Container CPU Usage By workload (%)', 'Container CPU Usage', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (rate(container_cpu_usage_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) * 100', 'CPU', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} CPU Usage (%):{{humanize $value}}%|{threshold}%', now(), now()) + WHERE public.metric_meta2.id = 'container_cpu_usage_by_workload'; + + +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + VALUES ('container_fs_reads_by_workload', 'Container Filesystem Read Bytes By workload (KiB)', 'Cumulative count of bytes read / 1024', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (rate(container_fs_reads_bytes_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1024', 'Filesystem', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Filesystem Reads:{{humanize $value}}KiB|{threshold}KiB.', now(), now()) + ON CONFLICT (id) + DO + UPDATE SET (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + = ('container_fs_reads_by_workload', 'Container Filesystem Read Bytes By workload (KiB)', 'Cumulative count of bytes read / 1024', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (rate(container_fs_reads_bytes_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1024', 'Filesystem', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Filesystem Reads:{{humanize $value}}KiB|{threshold}KiB.', now(), now()) + WHERE public.metric_meta2.id = 'container_fs_reads_by_workload'; + + +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + VALUES ('container_fs_limit_bytes_by_workload', 'Container Filesystem Limit Bytes By workload (GiB)', 'Number of bytes that can be consumed by the container on this filesystem / 1073741824', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (container_fs_limit_bytes{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1073741824', 'Filesystem', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Filesystem Limit:{{humanize $value}}GiB|{threshold}GiB.', now(), now()) + ON CONFLICT (id) + DO + UPDATE SET (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + = ('container_fs_limit_bytes_by_workload', 'Container Filesystem Limit Bytes By workload (GiB)', 'Number of bytes that can be consumed by the container on this filesystem / 1073741824', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (container_fs_limit_bytes{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1073741824', 'Filesystem', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Filesystem Limit:{{humanize $value}}GiB|{threshold}GiB.', now(), now()) + WHERE public.metric_meta2.id = 'container_fs_limit_bytes_by_workload'; + + +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + VALUES ('container_fs_usage_bytes_by_workload', 'Container Filesystem Used Bytes By workload (GiB)', 'Number of bytes that are consumed by the container on this filesystem / 1073741824', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (container_fs_usage_bytes{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1073741824', 'Filesystem', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Filesystem Used:{{humanize $value}}GiB||{threshold}GiB.', now(), now()) + ON CONFLICT (id) + DO + UPDATE SET (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + = ('container_fs_usage_bytes_by_workload', 'Container Filesystem Used Bytes By workload (GiB)', 'Number of bytes that are consumed by the container on this filesystem / 1073741824', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (container_fs_usage_bytes{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1073741824', 'Filesystem', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Filesystem Used:{{humanize $value}}GiB||{threshold}GiB.', now(), now()) + WHERE public.metric_meta2.id = 'container_fs_usage_bytes_by_workload'; + + +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + VALUES ('container_fs_writes_by_workload', 'Container Filesystem Write Bytes By workload (KiB)', 'Cumulative count of bytes written / 1024', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (rate(container_fs_writes_bytes_total{xm_cont_name!="POD"}[1m]) + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1024', 'Filesystem', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Filesystem Writes:{{humanize $value}}KiB|{threshold}KiB.', now(), now()) + ON CONFLICT (id) + DO + UPDATE SET (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + = ('container_fs_writes_by_workload', 'Container Filesystem Write Bytes By workload (KiB)', 'Cumulative count of bytes written / 1024', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (rate(container_fs_writes_bytes_total{xm_cont_name!="POD"}[1m]) + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1024', 'Filesystem', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Filesystem Writes:{{humanize $value}}KiB|{threshold}KiB.', now(), now()) + WHERE public.metric_meta2.id = 'container_fs_writes_by_workload'; + + +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + VALUES ('container_fs_usage_by_workload', 'Container Filesystem Usage By workload (%)', 'Container File System Usage: 100 * (Used Bytes / Limit Bytes) (not contain persistent volume)', 'sum by (xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) ((container_fs_usage_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0)/ (((container_fs_limit_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) * 100) > 0) or (container_fs_usage_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1000)', 'Filesystem', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.o + wner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Filesystem Usage:{{humanize $value}}%|{threshold}%.', now(), now()) + ON CONFLICT (id) + DO + UPDATE SET (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + = ('container_fs_usage_by_workload', 'Container Filesystem Usage By workload (%)', 'Container File System Usage: 100 * (Used Bytes / Limit Bytes) (not contain persistent volume)', 'sum by (xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) ((container_fs_usage_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0)/ (((container_fs_limit_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) * 100) > 0) or (container_fs_usage_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1000)', 'Filesystem', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.o + wner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Filesystem Usage:{{humanize $value}}%|{threshold}%.', now(), now()) + WHERE public.metric_meta2.id = 'container_fs_usage_by_workload'; + + +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + VALUES ('container_memory_max_usage_bytes_by_workload', 'Container Memory Max Used By workload (GiB)', 'Maximum memory usage recorded in bytes / 1073741824', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (container_memory_max_usage_bytes{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1073741824', 'Memory', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Max Memory Usage:{{humanize $value}}GiB|{threshold}GiB.', now(), now()) + ON CONFLICT (id) + DO + UPDATE SET (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + = ('container_memory_max_usage_bytes_by_workload', 'Container Memory Max Used By workload (GiB)', 'Maximum memory usage recorded in bytes / 1073741824', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (container_memory_max_usage_bytes{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1073741824', 'Memory', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Max Memory Usage:{{humanize $value}}GiB|{threshold}GiB.', now(), now()) + WHERE public.metric_meta2.id = 'container_memory_max_usage_bytes_by_workload'; + + +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + VALUES ('container_memory_usage_bytes_by_workload', 'Container Memory Used By workload (GiB)', 'Current memory usage in GiB, this includes all memory regardless of when it was accessed', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (container_memory_usage_bytes{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1024 / 1024 / 1024', 'Memory', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Used Memory:{{humanize $value}}GiB|{threshold}GiB.', now(), now()) + ON CONFLICT (id) + DO + UPDATE SET (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + = ('container_memory_usage_bytes_by_workload', 'Container Memory Used By workload (GiB)', 'Current memory usage in GiB, this includes all memory regardless of when it was accessed', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (container_memory_usage_bytes{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1024 / 1024 / 1024', 'Memory', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Used Memory:{{humanize $value}}GiB|{threshold}GiB.', now(), now()) + WHERE public.metric_meta2.id = 'container_memory_usage_bytes_by_workload'; + + + +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + VALUES ('container_memory_usage_by_workload', 'Container Memory Usage By workload (%)', 'Container Memory usage compared to limit if limit is non-zero or 1GiB if limit is zero', 'sum by (xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) ((container_memory_usage_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / (((container_spec_memory_limit_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0)) > 0) * 100) or sum by (xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) ((container_memory_usage_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1024 / 1024 / 1024 *100)', 'Memory', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Memory Usage:{{humanize $value}}%|{threshold}%.', now(), now()) + ON CONFLICT (id) + DO + UPDATE SET (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + = ('container_memory_usage_by_workload', 'Container Memory Usage By workload (%)', 'Container Memory usage compared to limit if limit is non-zero or 1GiB if limit is zero', 'sum by (xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) ((container_memory_usage_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / (((container_spec_memory_limit_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0)) > 0) * 100) or sum by (xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) ((container_memory_usage_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1024 / 1024 / 1024 *100)', 'Memory', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Memory Usage:{{humanize $value}}%|{threshold}%.', now(), now()) + WHERE public.metric_meta2.id = 'container_memory_usage_by_workload'; + + +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + VALUES ('container_memory_swap_by_workload', 'Container Memory Swap By workload (GiB)', 'Container swap usage in bytes / 1073741824', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (container_memory_swap{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1073741824', 'Memory', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Swap Memory:{{humanize $value}}GiB|{threshold}GiB.', now(), now()) + ON CONFLICT (id) + DO + UPDATE SET (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + = ('container_memory_swap_by_workload', 'Container Memory Swap By workload (GiB)', 'Container swap usage in bytes / 1073741824', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (container_memory_swap{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1073741824', 'Memory', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Swap Memory:{{humanize $value}}GiB|{threshold}GiB.', now(), now()) + WHERE public.metric_meta2.id = 'container_memory_swap_by_workload'; + + +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + VALUES ('container_memory_working_set_bytes_by_workload', 'Container Memory Working Set By workload (GiB)', 'Current working set in GiB, this includes recently accessed memory, dirty memory, and kernel memory', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (container_memory_working_set_bytes{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1024 / 1024 / 1024', 'Memory', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Working Set Memory:{{humanize $value}}GiB|{threshold}GiB.', now(), now()) + ON CONFLICT (id) + DO + UPDATE SET (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + = ('container_memory_working_set_bytes_by_workload', 'Container Memory Working Set By workload (GiB)', 'Current working set in GiB, this includes recently accessed memory, dirty memory, and kernel memory', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (container_memory_working_set_bytes{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1024 / 1024 / 1024', 'Memory', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Working Set Memory:{{humanize $value}}GiB|{threshold}GiB.', now(), now()) + WHERE public.metric_meta2.id = 'container_memory_working_set_bytes_by_workload'; + + +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + VALUES ('container_memory_cache_by_workload', 'Container Memory Cache By workload (GiB)', 'Number of bytes of page cache memory / 1073741824', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (container_memory_cache{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1073741824', 'Memory', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Cache Memory:{{humanize $value}}GiB|{threshold}GiB.', now(), now()) + ON CONFLICT (id) + DO + UPDATE SET (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + = ('container_memory_cache_by_workload', 'Container Memory Cache By workload (GiB)', 'Number of bytes of page cache memory / 1073741824', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (container_memory_cache{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1073741824', 'Memory', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Cache Memory:{{humanize $value}}GiB|{threshold}GiB.', now(), now()) + WHERE public.metric_meta2.id = 'container_memory_cache_by_workload'; + + +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + VALUES ('container_network_receive_by_workload', 'Container Network Receive By workload (KiB)', 'Network device statistic receive_bytes / 1024', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (rate(container_network_receive_bytes_total{} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1024', 'Network', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Network Receive Usage:{{humanize $value}}KiB|{threshold}KiB.', now(), now()) + ON CONFLICT (id) + DO + UPDATE SET (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + = ('container_network_receive_by_workload', 'Container Network Receive By workload (KiB)', 'Network device statistic receive_bytes / 1024', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (rate(container_network_receive_bytes_total{} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1024', 'Network', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Network Receive Usage:{{humanize $value}}KiB|{threshold}KiB.', now(), now()) + WHERE public.metric_meta2.id = 'container_network_receive_by_workload'; + + +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + VALUES ('container_network_transmit_by_workload', 'Container Network Transmit By workload (KiB)', 'Network device statistic transmit_bytes / 1024', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (rate(container_network_transmit_bytes_total{} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1024', 'Network', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Network Transmit Usage:{{humanize $value}}KiB|{threshold}KiB.', now(), now()) + ON CONFLICT (id) + DO + UPDATE SET (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + = ('container_network_transmit_by_workload', 'Container Network Transmit By workload (KiB)', 'Network device statistic transmit_bytes / 1024', 'sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name) (rate(container_network_transmit_bytes_total{} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1024', 'Network', 'Workload', NULL, TRUE, TRUE, 'CLST:{{$labels.xm_clst_id}} DP:{{$labels.owner_name}} CT:{{$labels.xm_cont_name}} PD:{{$labels.xm_pod_id}} Network Transmit Usage:{{humanize $value}}KiB|{threshold}KiB.', now(), now()) + WHERE public.metric_meta2.id = 'container_network_transmit_by_workload'; + + +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + VALUES ('count_pod_not_running_by_workload','Number of Pods not running By Workload','Number of Pods not running (pod_state)','count by (xm_clst_id, xm_pod_id,xm_cont_id, xm_cont_name, entity_type, xm_namespace, pod_state) (imxc_kubernetes_container_resource_limit_cpu{pod_state!="Running", {filter}})','State','Workload',null,true,false,'CLST:{{$labels.xm_clst_id}} POD:{{$labels.xm_pod_id}} State:{{$labels.pod_state}}.',now(),now()) + ON CONFLICT (id) + DO + UPDATE SET (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + = ('count_pod_not_running_by_workload','Number of Pods not running By Workload','Number of Pods not running (pod_state)','count by (xm_clst_id, xm_pod_id,xm_cont_id, xm_cont_name, entity_type, xm_namespace, pod_state) (imxc_kubernetes_container_resource_limit_cpu{pod_state!="Running", {filter}})','State','Workload',null,true,false,'CLST:{{$labels.xm_clst_id}} POD:{{$labels.xm_pod_id}} State:{{$labels.pod_state}}.',now(),now()) + WHERE public.metric_meta2.id = 'count_pod_not_running_by_workload'; + + +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + VALUES ('count_container_not_running_by_workload','Number of Containers not running By Workload','Number of Containers not running (container_state)','count by (xm_clst_id, xm_pod_id, xm_cont_id, xm_cont_name, entity_type, xm_namespace, container_state) (imxc_kubernetes_container_resource_limit_cpu{container_state!="Running", {filter}})','State','Workload',null,true,false,'CLST:{{$labels.xm_clst_id}} CONT:{{$labels.xm_cont_name}} State:{{$labels.container_state}}.',now(),now()) + ON CONFLICT (id) + DO + UPDATE SET (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + = ('count_container_not_running_by_workload','Number of Containers not running By Workload','Number of Containers not running (container_state)','count by (xm_clst_id, xm_pod_id, xm_cont_id, xm_cont_name, entity_type, xm_namespace, container_state) (imxc_kubernetes_container_resource_limit_cpu{container_state!="Running", {filter}})','State','Workload',null,true,false,'CLST:{{$labels.xm_clst_id}} CONT:{{$labels.xm_cont_name}} State:{{$labels.container_state}}.',now(),now()) + WHERE public.metric_meta2.id = 'count_container_not_running_by_workload'; + + +INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + VALUES ('cotainer_restart_count_by_workload','Number of Containers Restart','Number of Containers Restart (10m)','increase(imxc_kubernetes_container_restart_count{{filter}}[10m])>1','State','Workload',null,true,false,'CLST:{{$labels.xm_clst_id}} CONT:{{$labels.xm_cont_name}} RESTARTCOUNT FOR 10MINUTE:{{humanize $value}}.',now(),now()) + ON CONFLICT (id) + DO + UPDATE SET (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) + = ('cotainer_restart_count_by_workload','Number of Containers Restart','Number of Containers Restart (10m)','increase(imxc_kubernetes_container_restart_count{{filter}}[10m])>1','State','Workload',null,true,false,'CLST:{{$labels.xm_clst_id}} CONT:{{$labels.xm_cont_name}} RESTARTCOUNT FOR 10MINUTE:{{humanize $value}}.',now(),now()) + WHERE public.metric_meta2.id = 'cotainer_restart_count_by_workload'; + + +INSERT INTO public.agent_install_file_info (id, name, type, description, yaml, use_yn, created_date, modified_date, version) +VALUES (4, 'node-exporter', 'agent', 'Node에 관련된 Metric 시계열 데이터를 수집하여 고객사 클러스터에 설치된 Prometheus에 전달하는 역할을 합니다.', '--- + apiVersion: v1 + kind: Service + metadata: + annotations: + prometheus.io/scrape: ''true'' + labels: + app: cloudmoa-node-exporter + name: cloudmoa-node-exporter + name: cloudmoa-node-exporter + namespace: $CLOUDMOA_NAMESPACE + spec: + clusterIP: None + ports: + - name: scrape + port: 9110 + protocol: TCP + selector: + app: cloudmoa-node-exporter + type: ClusterIP + --- + apiVersion: apps/v1 + kind: DaemonSet + metadata: + name: cloudmoa-node-exporter + namespace: $CLOUDMOA_NAMESPACE + spec: + selector: + matchLabels: + app: cloudmoa-node-exporter + template: + metadata: + labels: + app: cloudmoa-node-exporter + name: cloudmoa-node-exporter + spec: + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + containers: + - image: $DOCKER_REGISTRY_URL/prom/node-exporter + name: cloudmoa-node-exporter + ports: + - containerPort: 9110 + hostPort: 9110 + name: scrape + args: + - --path.procfs=/host/proc + - --path.sysfs=/host/sys + - --path.rootfs=/host/root + - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|run|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/) + - --collector.tcpstat + - --web.listen-address=:9110 + # --log.level=debug + resources: + limits: + cpu: 250m + memory: 180Mi + requests: + cpu: 102m + memory: 180Mi + volumeMounts: + - mountPath: /host/proc + name: proc + readOnly: false + - mountPath: /host/sys + name: sys + readOnly: false + - mountPath: /host/root + mountPropagation: HostToContainer + name: root + readOnly: true + hostNetwork: true + hostPID: true + securityContext: + runAsNonRoot: true + runAsUser: 65534 + volumes: + - hostPath: + path: /proc + name: proc + - hostPath: + path: /sys + name: sys + - hostPath: + path: / + name: root + ', true, '2021-03-11 13:41:02.000000', '2021-03-11 13:41:06.000000', null) +ON CONFLICT (id) +DO + UPDATE SET (id, name, type, description, yaml, use_yn, created_date, modified_date, version) + = (4, 'node-exporter', 'agent', 'Node에 관련된 Metric 시계열 데이터를 수집하여 고객사 클러스터에 설치된 Prometheus에 전달하는 역할을 합니다.', '--- + apiVersion: v1 + kind: Service + metadata: + annotations: + prometheus.io/scrape: ''true'' + labels: + app: cloudmoa-node-exporter + name: cloudmoa-node-exporter + name: cloudmoa-node-exporter + namespace: $CLOUDMOA_NAMESPACE + spec: + clusterIP: None + ports: + - name: scrape + port: 9110 + protocol: TCP + selector: + app: cloudmoa-node-exporter + type: ClusterIP + --- + apiVersion: apps/v1 + kind: DaemonSet + metadata: + name: cloudmoa-node-exporter + namespace: $CLOUDMOA_NAMESPACE + spec: + selector: + matchLabels: + app: cloudmoa-node-exporter + template: + metadata: + labels: + app: cloudmoa-node-exporter + name: cloudmoa-node-exporter + spec: + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + containers: + - image: $DOCKER_REGISTRY_URL/prom/node-exporter + name: cloudmoa-node-exporter + ports: + - containerPort: 9110 + hostPort: 9110 + name: scrape + args: + - --path.procfs=/host/proc + - --path.sysfs=/host/sys + - --path.rootfs=/host/root + - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|run|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/) + - --collector.tcpstat + - --web.listen-address=:9110 + # --log.level=debug + resources: + limits: + cpu: 250m + memory: 180Mi + requests: + cpu: 102m + memory: 180Mi + volumeMounts: + - mountPath: /host/proc + name: proc + readOnly: false + - mountPath: /host/sys + name: sys + readOnly: false + - mountPath: /host/root + mountPropagation: HostToContainer + name: root + readOnly: true + hostNetwork: true + hostPID: true + securityContext: + runAsNonRoot: true + runAsUser: 65534 + volumes: + - hostPath: + path: /proc + name: proc + - hostPath: + path: /sys + name: sys + - hostPath: + path: / + name: root + ', true, '2021-03-11 13:41:02.000000', '2021-03-11 13:41:06.000000', null) + WHERE public.agent_install_file_info.id = 4; + + +INSERT INTO public.agent_install_file_info (id, name, type, description, yaml, use_yn, created_date, modified_date, version) +VALUES (3, 'prometheus', 'agent', 'Prometheus는 다양한 Exporter들과 연결될 수 있으며, 기본적으로 Node Exporter와 cAdvisor를 통해 수집한 Metric 데이터를 Kafka를 통해 수집 클러스터에 전달하는 역할을 합니다.', '--- + # VERSION : 20190227142300 + + apiVersion: v1 + kind: ConfigMap + metadata: + name: cloudmoa-prometheus-configuration + namespace: $CLOUDMOA_NAMESPACE + data: + scaling.rules: | + groups: + - name: scaleup + rules : + - alert : ScaleUpRule + expr: job:webapp_config_open_sessions_current_count:sum > 15 + annotations: + summary: "Scale up when current sessions is greater than 15" + description: "Firing when total sessions active greater than 15" + prometheus.yml: | + global: + scrape_interval: 15s + # Attach these labels to any time series or alerts when communicating with + # external systems. + external_labels: + monitor: ''5s-monitor'' + + #kafka writer only + no_local_disk_write: true + + # A scrape configuration for running Prometheus on a Kubernetes cluster. + # This uses separate scrape configs for cluster components (i.e. API server, node) + # and services to allow each to use different authentication configs. + # + # Kubernetes labels will be added as Prometheus labels on metrics via the + # `labelmap` relabeling action. + # + + # + # rule_files: + # - "scaling.rules" + + # i suppose my code in the remote kafka write is something wrong ... should append a double quote character at the end of the url + remote_write: + - url: kafka://$COLLTION_SERVER_KAFKA_IP:$COLLTION_SERVER_KAFKA_INTERFACE_PORT/remote_prom?encoding=proto3&compression=snappy + + scrape_configs: + + # Scrape config for nodes (kubelet). + # + # Rather than connecting directly to the node, the scrape is proxied though the + # Kubernetes apiserver. This means it will work if Prometheus is running out of + # cluster, or can''t connect to nodes for some other reason (e.g. because of + # firewalling). + - job_name: ''kubernetes-kubelet'' + + # Default to scraping over https. If required, just disable this or change to + # `http`. + scheme: https + # This TLS & bearer token file config is used to connect to the actual scrape + # endpoints for cluster components. This is separate to discovery auth + # configuration because discovery & scraping are two separate concerns in + # Prometheus. The discovery auth config is automatic if Prometheus runs inside + # the cluster. Otherwise, more config options have to be provided within the + # . + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - target_label: xm_entity_type + replacement: ''Node'' + + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (kubelet_running_pod_count) + action: keep + + # copied from https://github.com/kayrus/prometheus-kubernetes/blob/master/prometheus-configmap.yaml + - job_name: ''kubernetes-node-exporter'' + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - source_labels: [__meta_kubernetes_role] + action: replace + target_label: kubernetes_role + - source_labels: [__address__] + regex: ''(.*):10250'' + replacement: ''${1}:9110'' + target_label: __address__ + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: __instance__ + # set "name" value to "job" + - source_labels: [job] + regex: ''kubernetes-(.*)'' + replacement: ''${1}'' + target_label: name + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Node'' + - source_labels: [__meta_kubernetes_namespace] + separator: ; + regex: (.*) + target_label: xm_namespace + replacement: $1 + action: replace + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total) + action: keep + + + - job_name: ''kubernetes-cadvisor'' + + # Default to scraping over https. If required, just disable this or change to + # `http`. + scheme: https + + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: node + metrics_path: /metrics/cadvisor + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Container'' + + metric_relabel_configs: + - source_labels: [namespace] + target_label: xm_namespace + - source_labels: [pod_name] + target_label: xm_pod_id + - source_labels: [container_name] + target_label: xm_cont_name + - source_labels: [id] + target_label: xm_cont_id + - source_labels: [container_name] + regex: (.+) + action: keep + - source_labels: [ __name__ ] + regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes) + action: keep + --- + apiVersion: v1 + kind: Service + metadata: + name: cloudmoa-prometheus + namespace: $CLOUDMOA_NAMESPACE + spec: + ports: + - port: 9090 + protocol: TCP + targetPort: 9090 + selector: + app: cloudmoa-prometheus + type: ClusterIP + --- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: cloudmoa-prometheus + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-prometheus + spec: + selector: + matchLabels: + app: cloudmoa-prometheus + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: cloudmoa-prometheus + spec: + containers: + - name: cloudmoa-prometheus + image: $DOCKER_REGISTRY_URL/imxc/metric-agent:$IMAGE_TAG + ports: + - containerPort: 9090 + args: + - --config.file=/etc/prometheus/prometheus.yml + #- --log.level=debug + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 300m + memory: 1000Mi + volumeMounts: + - mountPath: "/prometheus" + name: data + - mountPath: /etc/prometheus/ + name: config-volume + env: + - name: LOG_LEVEL + value: "INFO" + - name: LOG_MAXAGE + value: "1" + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: API_SERVER_LICENSE + value: $COLLTION_SERVER_API_IP:8080 + + restartPolicy: Always + volumes: + - emptyDir: {} + name: data + - name: config-volume + configMap: + name: cloudmoa-prometheus-configuration + ', true, '2021-03-11 13:39:07.000000', '2021-03-11 13:39:09.000000', '1.15') +ON CONFLICT (id) +DO + UPDATE SET (id, name, type, description, yaml, use_yn, created_date, modified_date, version) + = (3, 'prometheus', 'agent', 'Prometheus는 다양한 Exporter들과 연결될 수 있으며, 기본적으로 Node Exporter와 cAdvisor를 통해 수집한 Metric 데이터를 Kafka를 통해 수집 클러스터에 전달하는 역할을 합니다.', '--- + # VERSION : 20190227142300 + + apiVersion: v1 + kind: ConfigMap + metadata: + name: cloudmoa-prometheus-configuration + namespace: $CLOUDMOA_NAMESPACE + data: + scaling.rules: | + groups: + - name: scaleup + rules : + - alert : ScaleUpRule + expr: job:webapp_config_open_sessions_current_count:sum > 15 + annotations: + summary: "Scale up when current sessions is greater than 15" + description: "Firing when total sessions active greater than 15" + prometheus.yml: | + global: + scrape_interval: 15s + # Attach these labels to any time series or alerts when communicating with + # external systems. + external_labels: + monitor: ''5s-monitor'' + + #kafka writer only + no_local_disk_write: true + + # A scrape configuration for running Prometheus on a Kubernetes cluster. + # This uses separate scrape configs for cluster components (i.e. API server, node) + # and services to allow each to use different authentication configs. + # + # Kubernetes labels will be added as Prometheus labels on metrics via the + # `labelmap` relabeling action. + # + + # + # rule_files: + # - "scaling.rules" + + # i suppose my code in the remote kafka write is something wrong ... should append a double quote character at the end of the url + remote_write: + - url: kafka://$COLLTION_SERVER_KAFKA_IP:$COLLTION_SERVER_KAFKA_INTERFACE_PORT/remote_prom?encoding=proto3&compression=snappy + + scrape_configs: + + # Scrape config for nodes (kubelet). + # + # Rather than connecting directly to the node, the scrape is proxied though the + # Kubernetes apiserver. This means it will work if Prometheus is running out of + # cluster, or can''t connect to nodes for some other reason (e.g. because of + # firewalling). + - job_name: ''kubernetes-kubelet'' + + # Default to scraping over https. If required, just disable this or change to + # `http`. + scheme: https + # This TLS & bearer token file config is used to connect to the actual scrape + # endpoints for cluster components. This is separate to discovery auth + # configuration because discovery & scraping are two separate concerns in + # Prometheus. The discovery auth config is automatic if Prometheus runs inside + # the cluster. Otherwise, more config options have to be provided within the + # . + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - target_label: xm_entity_type + replacement: ''Node'' + + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (kubelet_running_pod_count) + action: keep + + # copied from https://github.com/kayrus/prometheus-kubernetes/blob/master/prometheus-configmap.yaml + - job_name: ''kubernetes-node-exporter'' + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - source_labels: [__meta_kubernetes_role] + action: replace + target_label: kubernetes_role + - source_labels: [__address__] + regex: ''(.*):10250'' + replacement: ''${1}:9110'' + target_label: __address__ + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: __instance__ + # set "name" value to "job" + - source_labels: [job] + regex: ''kubernetes-(.*)'' + replacement: ''${1}'' + target_label: name + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Node'' + - source_labels: [__meta_kubernetes_namespace] + separator: ; + regex: (.*) + target_label: xm_namespace + replacement: $1 + action: replace + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total) + action: keep + + + - job_name: ''kubernetes-cadvisor'' + + # Default to scraping over https. If required, just disable this or change to + # `http`. + scheme: https + + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: node + metrics_path: /metrics/cadvisor + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Container'' + + metric_relabel_configs: + - source_labels: [namespace] + target_label: xm_namespace + - source_labels: [pod_name] + target_label: xm_pod_id + - source_labels: [container_name] + target_label: xm_cont_name + - source_labels: [id] + target_label: xm_cont_id + - source_labels: [container_name] + regex: (.+) + action: keep + - source_labels: [ __name__ ] + regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes) + action: keep + --- + apiVersion: v1 + kind: Service + metadata: + name: cloudmoa-prometheus + namespace: $CLOUDMOA_NAMESPACE + spec: + ports: + - port: 9090 + protocol: TCP + targetPort: 9090 + selector: + app: cloudmoa-prometheus + type: ClusterIP + --- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: cloudmoa-prometheus + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-prometheus + spec: + selector: + matchLabels: + app: cloudmoa-prometheus + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: cloudmoa-prometheus + spec: + containers: + - name: cloudmoa-prometheus + image: $DOCKER_REGISTRY_URL/imxc/metric-agent:$IMAGE_TAG + ports: + - containerPort: 9090 + args: + - --config.file=/etc/prometheus/prometheus.yml + #- --log.level=debug + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 300m + memory: 1000Mi + volumeMounts: + - mountPath: "/prometheus" + name: data + - mountPath: /etc/prometheus/ + name: config-volume + env: + - name: LOG_LEVEL + value: "INFO" + - name: LOG_MAXAGE + value: "1" + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: API_SERVER_LICENSE + value: $COLLTION_SERVER_API_IP:8080 + + restartPolicy: Always + volumes: + - emptyDir: {} + name: data + - name: config-volume + configMap: + name: cloudmoa-prometheus-configuration + ', true, '2021-03-11 13:39:07.000000', '2021-03-11 13:39:09.000000', '1.15') + WHERE public.agent_install_file_info.id = 3; + + +INSERT INTO public.agent_install_file_info (id, name, type, description, yaml, use_yn, created_date, modified_date, version) +VALUES (2, 'agent', 'agent', '관제 대상 클러스터의 Topology 데이터를 수집하여 Kafka를 통해 수집 클러스터에 전달하는 역할을 하며, 그 밖에 API 서버와의 TCP 연결을 통해 관리 기능, Log Viewer 기능 등을 수행합니다.', '--- + apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRole + metadata: + name: cloudmoa-cluster-role + rules: + - nonResourceURLs: + - "*" + verbs: + - get + - apiGroups: + - metrics.k8s.io + resources: + - pods + - nodes + verbs: + - get + - list + - watch + - apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch + - update + - apiGroups: + - "" + resources: + - services + verbs: + - get + - list + - watch + - update + - apiGroups: + - "" + resources: + - nodes/stats + - endpoints + - namespaces + - events + verbs: + - get + - list + - watch + - apiGroups: + - apps + resources: + - daemonsets + - deployments + - deployments/scale + - replicasets + - replicasets/scale + - statefulsets + - statefulsets/scale + verbs: + - get + - list + - watch + - apiGroups: + - batch + resources: + - jobs + verbs: + - get + - list + - watch + - update + - apiGroups: + - batch + resources: + - cronjobs + verbs: + - get + - list + - update + - apiGroups: + - storage.j8s.io + resources: + - storageclasses + verbs: + - get + - list + - apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - get + - list + - apiGroups: + - extensions + resources: + - ingresses + verbs: + - get + - list + - apiGroups: + - policy + resources: + - podsecuritypolicies + verbs: + - use + resourceNames: + - imxc-ps + - apiGroups: + - certificates.k8s.io + resourceNames: + - kubernetes.io/kube-apiserver-client-kubelet + resources: + - signers + verbs: + - approve + - apiGroups: + - certificates.k8s.io + resourceNames: + - kubernetes.io/kubelet-serving + resources: + - signers + verbs: + - approve + - apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - watch + - proxy + - apiGroups: + - "" + resources: + - nodes/log + - nodes/metrics + - nodes/proxy + - nodes/spec + - nodes/stats + verbs: + - ''*'' + - apiGroups: + - ''*'' + resources: + - ''*'' + verbs: + - get + - list + - watch + --- + apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRoleBinding + metadata: + name: cloudmoa-restricted-rb + namespace: $CLOUDMOA_NAMESPACE + subjects: + - kind: ServiceAccount + name: default + namespace: $CLOUDMOA_NAMESPACE + roleRef: + kind: ClusterRole + name: cloudmoa-cluster-role + apiGroup: rbac.authorization.k8s.io + --- + apiVersion: policy/v1beta1 + kind: PodSecurityPolicy + metadata: + name: cloudmoa-psp + namespace: $CLOUDMOA_NAMESPACE + spec: + privileged: true + seLinux: + rule: RunAsAny + supplementalGroups: + rule: RunAsAny + runAsUser: + rule: RunAsAny + fsGroup: + rule: RunAsAny + hostPorts: + - max: 65535 + min: 0 + hostNetwork: true + hostPID: true + volumes: + - configMap + - secret + - emptyDir + - hostPath + - projected + - downwardAPI + - persistentVolumeClaim + --- + apiVersion: apps/v1 + kind: DaemonSet + metadata: + name: cloudmoa-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-agent + spec: + selector: + matchLabels: + app: cloudmoa-agent + template: + metadata: + labels: + app: cloudmoa-agent + spec: + hostNetwork: true + hostPID: true + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + containers: + - name: cloudmoa-agent + image: $DOCKER_REGISTRY_URL/imxc/imxc-agent:$IMAGE_TAG + imagePullPolicy: Always + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: 500m + memory: 600Mi + securityContext: + privileged: true + volumeMounts: + - mountPath: /host/usr/bin + name: bin-volume + - mountPath: /var/run/docker.sock + name: docker-volume + - mountPath: /host/proc + name: proc-volume + - mountPath: /root + name: root-volume + - mountPath: /log + name: log-volume + env: + - name: KAFKA_SERVER + value: $COLLTION_SERVER_KAFKA_IP:$COLLTION_SERVER_KAFKA_INTERFACE_PORT + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: API_SERVER + value: $COLLTION_SERVER_API_IP:$COLLECTION_SERVER_API_NETTY_PORT + - name: ROOT_DIRECTORY + value: /root + - name: NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: LOG_LEVEL + value: "DEBUG" + - name: API_SERVER_LICENSE + value: $COLLTION_SERVER_API_IP:8080 + + volumes: + - name: bin-volume + hostPath: + path: /usr/bin + type: Directory + - name: docker-volume + hostPath: + path: /var/run/docker.sock + - name: proc-volume + hostPath: + path: /proc + - name: root-volume + hostPath: + path: / + - name: log-volume + hostPath: + path: /home', true, '2021-03-11 13:37:48.000000', '2021-03-11 13:37:51.000000', null) +ON CONFLICT (id) +DO + UPDATE SET (id, name, type, description, yaml, use_yn, created_date, modified_date, version) + = (2, 'agent', 'agent', '관제 대상 클러스터의 Topology 데이터를 수집하여 Kafka를 통해 수집 클러스터에 전달하는 역할을 하며, 그 밖에 API 서버와의 TCP 연결을 통해 관리 기능, Log Viewer 기능 등을 수행합니다.', '--- + apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRole + metadata: + name: cloudmoa-cluster-role + rules: + - nonResourceURLs: + - "*" + verbs: + - get + - apiGroups: + - metrics.k8s.io + resources: + - pods + - nodes + verbs: + - get + - list + - watch + - apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch + - update + - apiGroups: + - "" + resources: + - services + verbs: + - get + - list + - watch + - update + - apiGroups: + - "" + resources: + - nodes/stats + - endpoints + - namespaces + - events + verbs: + - get + - list + - watch + - apiGroups: + - apps + resources: + - daemonsets + - deployments + - deployments/scale + - replicasets + - replicasets/scale + - statefulsets + - statefulsets/scale + verbs: + - get + - list + - watch + - apiGroups: + - batch + resources: + - jobs + verbs: + - get + - list + - watch + - update + - apiGroups: + - batch + resources: + - cronjobs + verbs: + - get + - list + - update + - apiGroups: + - storage.j8s.io + resources: + - storageclasses + verbs: + - get + - list + - apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - get + - list + - apiGroups: + - extensions + resources: + - ingresses + verbs: + - get + - list + - apiGroups: + - policy + resources: + - podsecuritypolicies + verbs: + - use + resourceNames: + - imxc-ps + - apiGroups: + - certificates.k8s.io + resourceNames: + - kubernetes.io/kube-apiserver-client-kubelet + resources: + - signers + verbs: + - approve + - apiGroups: + - certificates.k8s.io + resourceNames: + - kubernetes.io/kubelet-serving + resources: + - signers + verbs: + - approve + - apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - watch + - proxy + - apiGroups: + - "" + resources: + - nodes/log + - nodes/metrics + - nodes/proxy + - nodes/spec + - nodes/stats + verbs: + - ''*'' + - apiGroups: + - ''*'' + resources: + - ''*'' + verbs: + - get + - list + - watch + --- + apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRoleBinding + metadata: + name: cloudmoa-restricted-rb + namespace: $CLOUDMOA_NAMESPACE + subjects: + - kind: ServiceAccount + name: default + namespace: $CLOUDMOA_NAMESPACE + roleRef: + kind: ClusterRole + name: cloudmoa-cluster-role + apiGroup: rbac.authorization.k8s.io + --- + apiVersion: policy/v1beta1 + kind: PodSecurityPolicy + metadata: + name: cloudmoa-psp + namespace: $CLOUDMOA_NAMESPACE + spec: + privileged: true + seLinux: + rule: RunAsAny + supplementalGroups: + rule: RunAsAny + runAsUser: + rule: RunAsAny + fsGroup: + rule: RunAsAny + hostPorts: + - max: 65535 + min: 0 + hostNetwork: true + hostPID: true + volumes: + - configMap + - secret + - emptyDir + - hostPath + - projected + - downwardAPI + - persistentVolumeClaim + --- + apiVersion: apps/v1 + kind: DaemonSet + metadata: + name: cloudmoa-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-agent + spec: + selector: + matchLabels: + app: cloudmoa-agent + template: + metadata: + labels: + app: cloudmoa-agent + spec: + hostNetwork: true + hostPID: true + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + containers: + - name: cloudmoa-agent + image: $DOCKER_REGISTRY_URL/imxc/imxc-agent:$IMAGE_TAG + imagePullPolicy: Always + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: 500m + memory: 600Mi + securityContext: + privileged: true + volumeMounts: + - mountPath: /host/usr/bin + name: bin-volume + - mountPath: /var/run/docker.sock + name: docker-volume + - mountPath: /host/proc + name: proc-volume + - mountPath: /root + name: root-volume + - mountPath: /log + name: log-volume + env: + - name: KAFKA_SERVER + value: $COLLTION_SERVER_KAFKA_IP:$COLLTION_SERVER_KAFKA_INTERFACE_PORT + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: API_SERVER + value: $COLLTION_SERVER_API_IP:$COLLECTION_SERVER_API_NETTY_PORT + - name: ROOT_DIRECTORY + value: /root + - name: NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: LOG_LEVEL + value: "DEBUG" + - name: API_SERVER_LICENSE + value: $COLLTION_SERVER_API_IP:8080 + + volumes: + - name: bin-volume + hostPath: + path: /usr/bin + type: Directory + - name: docker-volume + hostPath: + path: /var/run/docker.sock + - name: proc-volume + hostPath: + path: /proc + - name: root-volume + hostPath: + path: / + - name: log-volume + hostPath: + path: /home', true, '2021-03-11 13:37:48.000000', '2021-03-11 13:37:51.000000', null) + WHERE public.agent_install_file_info.id = 2; + + +INSERT INTO public.agent_install_file_info (id, name, type, description, yaml, use_yn, created_date, modified_date, version) +VALUES (6, 'prometheus', 'agent', 'Prometheus는 다양한 Exporter들과 연결될 수 있으며, 기본적으로 Node Exporter와 cAdvisor를 통해 수집한 Metric 데이터를 Kafka를 통해 수집 클러스터에 전달하는 역할을 합니다.', '--- + # VERSION : 20190227142300 + + apiVersion: v1 + kind: ConfigMap + metadata: + name: cloudmoa-prometheus-configuration + namespace: $CLOUDMOA_NAMESPACE + data: + scaling.rules: | + groups: + - name: scaleup + rules : + - alert : ScaleUpRule + expr: job:webapp_config_open_sessions_current_count:sum > 15 + annotations: + summary: "Scale up when current sessions is greater than 15" + description: "Firing when total sessions active greater than 15" + prometheus.yml: | + global: + scrape_interval: 15s + # Attach these labels to any time series or alerts when communicating with + # external systems. + external_labels: + monitor: ''5s-monitor'' + + #kafka writer only + no_local_disk_write: true + + # A scrape configuration for running Prometheus on a Kubernetes cluster. + # This uses separate scrape configs for cluster components (i.e. API server, node) + # and services to allow each to use different authentication configs. + # + # Kubernetes labels will be added as Prometheus labels on metrics via the + # `labelmap` relabeling action. + # + + # + # rule_files: + # - "scaling.rules" + + # i suppose my code in the remote kafka write is something wrong ... should append a double quote character at the end of the url + remote_write: + - url: kafka://$COLLTION_SERVER_KAFKA_IP:$COLLTION_SERVER_KAFKA_INTERFACE_PORT/remote_prom?encoding=proto3&compression=snappy + + scrape_configs: + + # Scrape config for nodes (kubelet). + # + # Rather than connecting directly to the node, the scrape is proxied though the + # Kubernetes apiserver. This means it will work if Prometheus is running out of + # cluster, or can''t connect to nodes for some other reason (e.g. because of + # firewalling). + - job_name: ''kubernetes-kubelet'' + + # Default to scraping over https. If required, just disable this or change to + # `http`. + scheme: https + # This TLS & bearer token file config is used to connect to the actual scrape + # endpoints for cluster components. This is separate to discovery auth + # configuration because discovery & scraping are two separate concerns in + # Prometheus. The discovery auth config is automatic if Prometheus runs inside + # the cluster. Otherwise, more config options have to be provided within the + # . + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - target_label: xm_entity_type + replacement: ''Node'' + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (kubelet_running_pod_count) + action: keep + + # copied from https://github.com/kayrus/prometheus-kubernetes/blob/master/prometheus-configmap.yaml + - job_name: ''kubernetes-node-exporter'' + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - source_labels: [__meta_kubernetes_role] + action: replace + target_label: kubernetes_role + - source_labels: [__address__] + regex: ''(.*):10250'' + replacement: ''${1}:9110'' + target_label: __address__ + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: __instance__ + # set "name" value to "job" + - source_labels: [job] + regex: ''kubernetes-(.*)'' + replacement: ''${1}'' + target_label: name + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Node'' + - source_labels: [__meta_kubernetes_namespace] + separator: ; + regex: (.*) + target_label: xm_namespace + replacement: $1 + action: replace + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total) + action: keep + + - job_name: ''kubernetes-cadvisor'' + + # Default to scraping over https. If required, just disable this or change to + # `http`. + scheme: https + + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: node + metrics_path: /metrics/cadvisor + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Container'' + + metric_relabel_configs: + - source_labels: [namespace] + target_label: xm_namespace + - source_labels: [pod] + target_label: xm_pod_id + - source_labels: [container] + target_label: xm_cont_name + - source_labels: [id] + target_label: xm_cont_id + - source_labels: [container] + regex: (.+) + action: keep + - source_labels: [ __name__ ] + regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes) + action: keep + --- + apiVersion: v1 + kind: Service + metadata: + name: cloudmoa-prometheus + namespace: $CLOUDMOA_NAMESPACE + spec: + ports: + - port: 9090 + protocol: TCP + targetPort: 9090 + selector: + app: cloudmoa-prometheus + type: ClusterIP + --- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: cloudmoa-prometheus + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-prometheus + spec: + selector: + matchLabels: + app: cloudmoa-prometheus + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: cloudmoa-prometheus + spec: + containers: + - name: cloudmoa-prometheus + image: $DOCKER_REGISTRY_URL/imxc/metric-agent:$IMAGE_TAG + ports: + - containerPort: 9090 + args: + - --config.file=/etc/prometheus/prometheus.yml + #- --log.level=debug + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 300m + memory: 1000Mi + volumeMounts: + - mountPath: "/prometheus" + name: data + - mountPath: /etc/prometheus/ + name: config-volume + env: + - name: LOG_LEVEL + value: "INFO" + - name: LOG_MAXAGE + value: "1" + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: API_SERVER_LICENSE + value: $COLLTION_SERVER_API_IP:8080 + + restartPolicy: Always + volumes: + - emptyDir: {} + name: data + - name: config-volume + configMap: + name: cloudmoa-prometheus-configuration + ', false, '2021-03-11 13:39:07.000000', '2021-03-11 13:39:09.000000', '1.16') +ON CONFLICT (id) +DO + UPDATE SET (id, name, type, description, yaml, use_yn, created_date, modified_date, version) + = (6, 'prometheus', 'agent', 'Prometheus는 다양한 Exporter들과 연결될 수 있으며, 기본적으로 Node Exporter와 cAdvisor를 통해 수집한 Metric 데이터를 Kafka를 통해 수집 클러스터에 전달하는 역할을 합니다.', '--- + # VERSION : 20190227142300 + + apiVersion: v1 + kind: ConfigMap + metadata: + name: cloudmoa-prometheus-configuration + namespace: $CLOUDMOA_NAMESPACE + data: + scaling.rules: | + groups: + - name: scaleup + rules : + - alert : ScaleUpRule + expr: job:webapp_config_open_sessions_current_count:sum > 15 + annotations: + summary: "Scale up when current sessions is greater than 15" + description: "Firing when total sessions active greater than 15" + prometheus.yml: | + global: + scrape_interval: 15s + # Attach these labels to any time series or alerts when communicating with + # external systems. + external_labels: + monitor: ''5s-monitor'' + + #kafka writer only + no_local_disk_write: true + + # A scrape configuration for running Prometheus on a Kubernetes cluster. + # This uses separate scrape configs for cluster components (i.e. API server, node) + # and services to allow each to use different authentication configs. + # + # Kubernetes labels will be added as Prometheus labels on metrics via the + # `labelmap` relabeling action. + # + + # + # rule_files: + # - "scaling.rules" + + # i suppose my code in the remote kafka write is something wrong ... should append a double quote character at the end of the url + remote_write: + - url: kafka://$COLLTION_SERVER_KAFKA_IP:$COLLTION_SERVER_KAFKA_INTERFACE_PORT/remote_prom?encoding=proto3&compression=snappy + + scrape_configs: + + # Scrape config for nodes (kubelet). + # + # Rather than connecting directly to the node, the scrape is proxied though the + # Kubernetes apiserver. This means it will work if Prometheus is running out of + # cluster, or can''t connect to nodes for some other reason (e.g. because of + # firewalling). + - job_name: ''kubernetes-kubelet'' + + # Default to scraping over https. If required, just disable this or change to + # `http`. + scheme: https + # This TLS & bearer token file config is used to connect to the actual scrape + # endpoints for cluster components. This is separate to discovery auth + # configuration because discovery & scraping are two separate concerns in + # Prometheus. The discovery auth config is automatic if Prometheus runs inside + # the cluster. Otherwise, more config options have to be provided within the + # . + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - target_label: xm_entity_type + replacement: ''Node'' + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (kubelet_running_pod_count) + action: keep + + # copied from https://github.com/kayrus/prometheus-kubernetes/blob/master/prometheus-configmap.yaml + - job_name: ''kubernetes-node-exporter'' + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - source_labels: [__meta_kubernetes_role] + action: replace + target_label: kubernetes_role + - source_labels: [__address__] + regex: ''(.*):10250'' + replacement: ''${1}:9110'' + target_label: __address__ + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: __instance__ + # set "name" value to "job" + - source_labels: [job] + regex: ''kubernetes-(.*)'' + replacement: ''${1}'' + target_label: name + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Node'' + - source_labels: [__meta_kubernetes_namespace] + separator: ; + regex: (.*) + target_label: xm_namespace + replacement: $1 + action: replace + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: (node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total) + action: keep + + - job_name: ''kubernetes-cadvisor'' + + # Default to scraping over https. If required, just disable this or change to + # `http`. + scheme: https + + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: node + metrics_path: /metrics/cadvisor + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: xm_clst_id + replacement: ''$CLOUDMOA_CLUSTER_ID'' + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: xm_node_id + - target_label: xm_entity_type + replacement: ''Container'' + + metric_relabel_configs: + - source_labels: [namespace] + target_label: xm_namespace + - source_labels: [pod] + target_label: xm_pod_id + - source_labels: [container] + target_label: xm_cont_name + - source_labels: [id] + target_label: xm_cont_id + - source_labels: [container] + regex: (.+) + action: keep + - source_labels: [ __name__ ] + regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes) + action: keep + --- + apiVersion: v1 + kind: Service + metadata: + name: cloudmoa-prometheus + namespace: $CLOUDMOA_NAMESPACE + spec: + ports: + - port: 9090 + protocol: TCP + targetPort: 9090 + selector: + app: cloudmoa-prometheus + type: ClusterIP + --- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: cloudmoa-prometheus + namespace: $CLOUDMOA_NAMESPACE + labels: + app: cloudmoa-prometheus + spec: + selector: + matchLabels: + app: cloudmoa-prometheus + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: cloudmoa-prometheus + spec: + containers: + - name: cloudmoa-prometheus + image: $DOCKER_REGISTRY_URL/imxc/metric-agent:$IMAGE_TAG + ports: + - containerPort: 9090 + args: + - --config.file=/etc/prometheus/prometheus.yml + #- --log.level=debug + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 300m + memory: 1000Mi + volumeMounts: + - mountPath: "/prometheus" + name: data + - mountPath: /etc/prometheus/ + name: config-volume + env: + - name: LOG_LEVEL + value: "INFO" + - name: LOG_MAXAGE + value: "1" + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: API_SERVER_LICENSE + value: $COLLTION_SERVER_API_IP:8080 + + restartPolicy: Always + volumes: + - emptyDir: {} + name: data + - name: config-volume + configMap: + name: cloudmoa-prometheus-configuration + ', false, '2021-03-11 13:39:07.000000', '2021-03-11 13:39:09.000000', '1.16') + WHERE public.agent_install_file_info.id = 6; + + +INSERT INTO public.agent_install_file_info (id, name, type, description, yaml, use_yn, created_date, modified_date, version) +VALUES (7, 'jaeger', 'application', 'CloudMOA에서는 고객사에서 운영 중인 application의 TPS, 서비스 연관관계 등의 데이터를 얻기 위해서 Jaeger를 사용하며, Jaeger 사용을 위해 Jaeger-client, jaeger-agent, jaeger-collector의 설치가 필요합니다. + ', '--- + apiVersion: v1 + kind: ConfigMap + metadata: + name: cloudmoa-jaeger-collector-configuration + namespace: $CLOUDMOA_NAMESPACE + data: + strategies.json: | + { + "default_strategy": { + "type": "probabilistic", + "param": 0.1 + } + } + --- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: cloudmoa-jaeger-collector + namespace: $CLOUDMOA_NAMESPACE + labels: + app: jaeger + jaeger-infra: collector-deployment + spec: + selector: + matchLabels: + app: jaeger + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: jaeger + jaeger-infra: collector-pod + spec: + securityContext: + runAsNonRoot: true + runAsUser: 65534 + containers: + - image: $DOCKER_REGISTRY_URL/jaeger/jaeger-collector:$IMAGE_TAG + name: jaeger-collector + args: + - --sampling.strategies-file=/etc/jaeger-collector/strategies.json + - --sampling.strategies-reload-interval=60s + resources: + requests: + cpu: 100m + memory: 50Mi + limits: + cpu: 200m + memory: 100Mi + ports: + - containerPort: 14267 + protocol: TCP + - containerPort: 14268 + protocol: TCP + - containerPort: 9411 + protocol: TCP + - containerPort: 14250 + protocol: TCP + - containerPort: 14269 + protocol: TCP + readinessProbe: + httpGet: + path: "/" + port: 14269 + env: + - name: COLLECTOR_ZIPKIN_HTTP_PORT + value: "9411" + - name: SPAN_STORAGE_TYPE + value: kafka + - name: KAFKA_PRODUCER_BROKERS + value: $COLLTION_SERVER_KAFKA_IP:$COLLTION_SERVER_KAFKA_INTERFACE_PORT + - name: LOG_LEVEL + value: "INFO" + - name: LOG_MAXAGE + value: "1" + - name: LOG_MAXBACKUPS + value: "3" + - name: LOG_MAXSIZE + value: "100" + - name: LOG_STDOUT + value: "TRUE" + - name: LOG_FILENAME + value: "jaeger-collector" + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: API_SERVER_LICENSE + value: $COLLTION_SERVER_API_IP:8080 + + volumeMounts: + - mountPath: /etc/jaeger-collector + name: config-volume + + volumes: + - name: config-volume + configMap: + name: cloudmoa-jaeger-collector-configuration + --- + apiVersion: v1 + kind: Service + metadata: + name: cloudmoa-jaeger-collector + namespace: $CLOUDMOA_NAMESPACE + labels: + app: jaeger + jaeger-infra: collector-service + spec: + ports: + - name: jaeger-collector-tchannel + port: 14267 + protocol: TCP + targetPort: 14267 + - name: jaeger-collector-metrics + port: 14269 + targetPort: 14269 + - name: jaeger-collector-grpc + port: 14250 + protocol: TCP + targetPort: 14250 + - name: jaeger-collector-zipkin + port: 9411 + targetPort: 9411 + selector: + jaeger-infra: collector-pod + type: ClusterIP + --- + apiVersion: v1 + kind: List + items: + - apiVersion: apps/v1 + kind: Deployment + metadata: + name: cloudmoa-jaeger-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: jaeger + app.kubernetes.io/name: jaeger + app.kubernetes.io/component: agent + spec: + selector: + matchLabels: + app: jaeger + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: jaeger + app.kubernetes.io/name: jaeger + app.kubernetes.io/component: agent + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "5778" + spec: + securityContext: + runAsNonRoot: true + runAsUser: 65534 + containers: + - image: $DOCKER_REGISTRY_URL/jaegertracing/jaeger-agent:$IMAGE_TAG + name: jaeger-agent + args: ["--reporter.grpc.host-port", "cloudmoa-jaeger-collector:14250"] + resources: + requests: + cpu: 100m + memory: 50Mi + limits: + cpu: 200m + memory: 100Mi + ports: + - containerPort: 5775 + protocol: UDP + - containerPort: 6831 + protocol: UDP + - containerPort: 6832 + protocol: UDP + - containerPort: 5778 + protocol: TCP + env: + - name: LOG_LEVEL + value: "INFO" + - name: LOG_MAXAGE + value: "1" + - name: LOG_MAXBACKUPS + value: "3" + - name: LOG_MAXSIZE + value: "100" + - name: LOG_STDOUT + value: "TRUE" + - name: LOG_FILENAME + value: "jaeger-agent" + + - apiVersion: v1 + kind: Service + metadata: + name: jaeger-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: jaeger + app.kubernetes.io/name: jaeger + app.kubernetes.io/component: agent + spec: + ports: + - name: agent-zipkin-thrift + port: 5775 + protocol: UDP + targetPort: 5775 + - name: agent-compact + port: 6831 + protocol: UDP + targetPort: 6831 + - name: agent-binary + port: 6832 + protocol: UDP + targetPort: 6832 + - name: agent-configs + port: 5778 + protocol: TCP + targetPort: 5778 + selector: + app.kubernetes.io/name: jaeger + app.kubernetes.io/component: agent + type: ClusterIP', true, '2021-03-11 17:48:34.000000', '2021-03-11 17:48:39.000000', null) +ON CONFLICT (id) +DO + UPDATE SET (id, name, type, description, yaml, use_yn, created_date, modified_date, version) + = (7, 'jaeger', 'application', 'CloudMOA에서는 고객사에서 운영 중인 application의 TPS, 서비스 연관관계 등의 데이터를 얻기 위해서 Jaeger를 사용하며, Jaeger 사용을 위해 Jaeger-client, jaeger-agent, jaeger-collector의 설치가 필요합니다. + ', '--- + apiVersion: v1 + kind: ConfigMap + metadata: + name: cloudmoa-jaeger-collector-configuration + namespace: $CLOUDMOA_NAMESPACE + data: + strategies.json: | + { + "default_strategy": { + "type": "probabilistic", + "param": 0.1 + } + } + --- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: cloudmoa-jaeger-collector + namespace: $CLOUDMOA_NAMESPACE + labels: + app: jaeger + jaeger-infra: collector-deployment + spec: + selector: + matchLabels: + app: jaeger + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: jaeger + jaeger-infra: collector-pod + spec: + securityContext: + runAsNonRoot: true + runAsUser: 65534 + containers: + - image: $DOCKER_REGISTRY_URL/jaeger/jaeger-collector:$IMAGE_TAG + name: jaeger-collector + args: + - --sampling.strategies-file=/etc/jaeger-collector/strategies.json + - --sampling.strategies-reload-interval=60s + resources: + requests: + cpu: 100m + memory: 50Mi + limits: + cpu: 200m + memory: 100Mi + ports: + - containerPort: 14267 + protocol: TCP + - containerPort: 14268 + protocol: TCP + - containerPort: 9411 + protocol: TCP + - containerPort: 14250 + protocol: TCP + - containerPort: 14269 + protocol: TCP + readinessProbe: + httpGet: + path: "/" + port: 14269 + env: + - name: COLLECTOR_ZIPKIN_HTTP_PORT + value: "9411" + - name: SPAN_STORAGE_TYPE + value: kafka + - name: KAFKA_PRODUCER_BROKERS + value: $COLLTION_SERVER_KAFKA_IP:$COLLTION_SERVER_KAFKA_INTERFACE_PORT + - name: LOG_LEVEL + value: "INFO" + - name: LOG_MAXAGE + value: "1" + - name: LOG_MAXBACKUPS + value: "3" + - name: LOG_MAXSIZE + value: "100" + - name: LOG_STDOUT + value: "TRUE" + - name: LOG_FILENAME + value: "jaeger-collector" + - name: CLUSTER_ID + value: $CLOUDMOA_CLUSTER_ID + - name: API_SERVER_LICENSE + value: $COLLTION_SERVER_API_IP:8080 + + volumeMounts: + - mountPath: /etc/jaeger-collector + name: config-volume + + volumes: + - name: config-volume + configMap: + name: cloudmoa-jaeger-collector-configuration + --- + apiVersion: v1 + kind: Service + metadata: + name: cloudmoa-jaeger-collector + namespace: $CLOUDMOA_NAMESPACE + labels: + app: jaeger + jaeger-infra: collector-service + spec: + ports: + - name: jaeger-collector-tchannel + port: 14267 + protocol: TCP + targetPort: 14267 + - name: jaeger-collector-metrics + port: 14269 + targetPort: 14269 + - name: jaeger-collector-grpc + port: 14250 + protocol: TCP + targetPort: 14250 + - name: jaeger-collector-zipkin + port: 9411 + targetPort: 9411 + selector: + jaeger-infra: collector-pod + type: ClusterIP + --- + apiVersion: v1 + kind: List + items: + - apiVersion: apps/v1 + kind: Deployment + metadata: + name: cloudmoa-jaeger-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: jaeger + app.kubernetes.io/name: jaeger + app.kubernetes.io/component: agent + spec: + selector: + matchLabels: + app: jaeger + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: jaeger + app.kubernetes.io/name: jaeger + app.kubernetes.io/component: agent + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "5778" + spec: + securityContext: + runAsNonRoot: true + runAsUser: 65534 + containers: + - image: $DOCKER_REGISTRY_URL/jaegertracing/jaeger-agent:$IMAGE_TAG + name: jaeger-agent + args: ["--reporter.grpc.host-port", "cloudmoa-jaeger-collector:14250"] + resources: + requests: + cpu: 100m + memory: 50Mi + limits: + cpu: 200m + memory: 100Mi + ports: + - containerPort: 5775 + protocol: UDP + - containerPort: 6831 + protocol: UDP + - containerPort: 6832 + protocol: UDP + - containerPort: 5778 + protocol: TCP + env: + - name: LOG_LEVEL + value: "INFO" + - name: LOG_MAXAGE + value: "1" + - name: LOG_MAXBACKUPS + value: "3" + - name: LOG_MAXSIZE + value: "100" + - name: LOG_STDOUT + value: "TRUE" + - name: LOG_FILENAME + value: "jaeger-agent" + + - apiVersion: v1 + kind: Service + metadata: + name: jaeger-agent + namespace: $CLOUDMOA_NAMESPACE + labels: + app: jaeger + app.kubernetes.io/name: jaeger + app.kubernetes.io/component: agent + spec: + ports: + - name: agent-zipkin-thrift + port: 5775 + protocol: UDP + targetPort: 5775 + - name: agent-compact + port: 6831 + protocol: UDP + targetPort: 6831 + - name: agent-binary + port: 6832 + protocol: UDP + targetPort: 6832 + - name: agent-configs + port: 5778 + protocol: TCP + targetPort: 5778 + selector: + app.kubernetes.io/name: jaeger + app.kubernetes.io/component: agent + type: ClusterIP', true, '2021-03-11 17:48:34.000000', '2021-03-11 17:48:39.000000', null) + WHERE public.agent_install_file_info.id = 7; + +--Menu Resource +--Infrastructure +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (0, 'Infrastructure', '01.Infrastructure', 0, NULL, (SELECT id FROM auth_resource3 WHERE name='menu|Infrastructure'), 3) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 3 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Infrastructure'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (1, 'Topology', NULL, 0, 'topologyInfra', (SELECT id FROM auth_resource3 WHERE name='menu|Infrastructure|Topology'), 3) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 3 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Infrastructure|Topology'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (2, 'Overview', NULL, 1, 'overViewInfra', (SELECT id FROM auth_resource3 WHERE name='menu|Infrastructure|Overview'), 3) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 3 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Infrastructure|Overview'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (3, 'Resource Usage', NULL, 2, 'resourceUsageInfra', (SELECT id FROM auth_resource3 WHERE name='menu|Infrastructure|Resource Usage'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Infrastructure|Resource Usage'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (4, 'Namespace', NULL, 3, 'namespaceInfra', (SELECT id FROM auth_resource3 WHERE name='menu|Infrastructure|Namespace'), 3) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 3 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Infrastructure|Namespace'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (5, 'Nodes', NULL, 4, 'nodesInfra', (select id from auth_resource3 where name='menu|Infrastructure|Nodes'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Infrastructure|Nodes'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (6, 'Node Details', NULL, 5, 'nodeDetailInfra', (select id from auth_resource3 where name='menu|Infrastructure|Node Details'), 3) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 3 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Infrastructure|Node Details'); + +--Workloads +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (10, 'Workloads', '02.Workload', 1, NULL, (select id from auth_resource3 where name='menu|Workloads'), 3) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 3 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Workloads'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (11, 'Overview', NULL, 0, 'overviewWorkloads', (select id from auth_resource3 where name='menu|Workloads|Overview'), 3) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 3 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Workloads|Overview'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (12, 'deployList', NULL, 1, 'deployListWorkloads', (select id from auth_resource3 where name='menu|Workloads|Deploy List'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Workloads|Deploy List'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (17, 'Jobs', NULL, 6, 'jobsWorkloads', (select id from auth_resource3 where name='menu|Workloads|Jobs'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Workloads|Jobs'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (18, 'Cron Jobs', NULL, 7, 'cronJobsWorkloads', (select id from auth_resource3 where name='menu|Workloads|Cron Jobs'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Workloads|Cron Jobs'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (19, 'Pods', NULL, 8, 'podsWorkloads', (select id from auth_resource3 where name='menu|Workloads|Pods'), 3) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 3 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Workloads|Pods'); + +--Services +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (20, 'Services', '03.Service', 2, NULL, (select id from auth_resource3 where name='menu|Services'), 3) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 3 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Services'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (21, 'DataCenter Service', NULL, 0, 'topologyServices', (select id from auth_resource3 where name='menu|Services|Topology'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Services|Topology'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (22, 'ServiceOverview', NULL, 1, 'overviewServices', (select id from auth_resource3 where name='menu|Services|Overview'), 0) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 0 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Services|Overview'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (23, 'Cluster Service', NULL, 2, 'detailServices', (select id from auth_resource3 where name='menu|Services|Structure'), 0) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 0 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Services|Structure'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (24, 'List', NULL, 3, 'serviceList', (select id from auth_resource3 where name='menu|Services|List'), 3) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 3 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Services|List'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (25, 'Detail', NULL, 4, 'slasServices', (select id from auth_resource3 where name='menu|Services|Detail'), 0) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 0 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Services|Detail'); + +--Statistics & Analysis +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (40, 'Statistics & Analysis', '06.Statistics&Analysis', 5, NULL, (select id from auth_resource3 where name='menu|Statistics & Analysis'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Statistics & Analysis'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (41, 'Performance Trends', NULL, 0, 'performanceTrendSA', (select id from auth_resource3 where name='menu|Statistics & Analysis|Performance Trends'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Statistics & Analysis|Performance Trends'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (42, 'Alert Analysis', NULL, 2, 'alertAnalysisSA', (select id from auth_resource3 where name='menu|Statistics & Analysis|Alert Analysis'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Statistics & Analysis|Alert Analysis'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (43, 'Alert History', NULL, 3, 'alertHistorySA', (select id from auth_resource3 where name='menu|Statistics & Analysis|Alert History'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Statistics & Analysis|Alert History'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (44, 'Anomaly Score Analysis', NULL, 4, 'anomalyScoreSA', (select id from auth_resource3 where name='menu|Statistics & Analysis|Anomaly Score'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Statistics & Analysis|Anomaly Score'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (45, 'Job History', NULL, 5, 'jobHistorySA', (select id from auth_resource3 where name='menu|Statistics & Analysis|Job History'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Statistics & Analysis|Job History'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (46, 'Sparse Log Analysis', NULL, 6, 'sparseLogSA', (select id from auth_resource3 where name='menu|Statistics & Analysis|Sparse Logs'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Statistics & Analysis|Sparse Logs'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (47, 'Log Viewer', NULL, 7, 'logViewerSA', (select id from auth_resource3 where name='menu|Statistics & Analysis|Log Viewer'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Statistics & Analysis|Log Viewer'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (48, 'eventLog Analysis', NULL, 8, 'eventLogSA', (select id from auth_resource3 where name='menu|Statistics & Analysis|Event Logs'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Statistics & Analysis|Event Logs'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (49, 'Container Life Cycle', NULL, 9, 'containerLifecycleSA', (select id from auth_resource3 where name='menu|Statistics & Analysis|Container Life Cycle'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Statistics & Analysis|Container Life Cycle'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (50, 'Service Trace Analysis', NULL, 10, 'serviceTraceSA', (select id from auth_resource3 where name='menu|Statistics & Analysis|Service Traces'), 0) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 0 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Statistics & Analysis|Service Traces'); + +--Reports +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (60, 'Reports', '07.Report', 6, NULL, (select id from auth_resource3 where name='menu|Reports'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Reports'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (61, 'Documents', NULL, 0, 'documentReport', (select id from auth_resource3 where name='menu|Reports|Documents'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Reports|Documents'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (62, 'Templates', NULL, 1, 'reportSettings', (select id from auth_resource3 where name='menu|Reports|Templates'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Reports|Templates'); + +--Dashboards +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (65, 'Dashboards', '10.Dashboard', 7, NULL, (select id from auth_resource3 where name='menu|Dashboards'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Dashboards'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (66, 'Documents', NULL, 0, 'documentDashboard', (select id from auth_resource3 where name='menu|Dashboards|Documents'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Dashboards|Documents'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (67, 'Templates', NULL, 1, 'templateDashboard', (select id from auth_resource3 where name='menu|Dashboards|Templates'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Dashboards|Templates'); + +--Hosts +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (80, 'Hosts', '12.Hosts', 1, NULL, (select id from auth_resource3 where name='menu|Hosts'), 0) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 0 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Hosts'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (81, 'Topology', null, 0, 'topologyHost', (select id from auth_resource3 where name='menu|Hosts|Topology'), 0) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 0 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Hosts|Topology'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (82, 'Overview', NULL, 1, 'overviewHost', (select id from auth_resource3 where name='menu|Hosts|Overview'), 0) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 0 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Hosts|Overview'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (83, 'List', NULL, 2, 'listHost', (select id from auth_resource3 where name='menu|Hosts|List'), 0) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 0 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Hosts|List'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (84, 'Detail', NULL, 3, 'detailHost', (select id from auth_resource3 where name='menu|Hosts|Detail'), 0) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 0 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Hosts|Detail'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (85, 'Group', NULL, 4, 'groupHost', (select id from auth_resource3 where name='menu|Hosts|Group'), 0) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 0 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Hosts|Group'); + +--Settings +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (90, 'Settings', '08.Setting', 10, NULL, (select id from auth_resource3 where name='menu|Settings'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Settings'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (91, 'User', NULL, 0, 'userGroupSettings', (select id from auth_resource3 where name='menu|Settings|User & Group'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Settings|User & Group'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (92, 'Alerts', NULL, 1, 'alertSettings', (select id from auth_resource3 where name='menu|Settings|Alerts'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Settings|Alerts'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (93, 'Host Alerts', NULL, 2, 'hostAlertSettings', (select id from auth_resource3 where name='menu|Settings|Host Alerts'), 0) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 0 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Settings|Host Alerts'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (94, 'Sparse Logs', NULL, 3, 'sparseLogSettings', (select id from auth_resource3 where name='menu|Settings|Sparse Logs'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Settings|Sparse Logs'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (96, 'Metric Meta', NULL, 5, 'metricMetaSettings', (select id from auth_resource3 where name='menu|Settings|Metric Meta'), 0) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 0 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Settings|Metric Meta'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (97, 'Appearance', NULL, 6, 'appearanceSettings', (select id from auth_resource3 where name='menu|Settings|General'), 0) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 0 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Settings|General'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (98, 'Notification', NULL, 7, 'notificationsSettings', (select id from auth_resource3 where name='menu|Settings|Notification'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Settings|Notification'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (99, 'Agent', NULL, 8, 'agentSettings', (select id from auth_resource3 where name='menu|Settings|Agent'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Settings|Agent'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (100, 'Alias', NULL, 9, 'aliasSettings', (select id from auth_resource3 where name='menu|Settings|Alias'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Settings|Alias'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (101, 'License', NULL, 10, 'validationLicense', (select id from auth_resource3 where name='menu|Settings|License'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Settings|License'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (102, 'agent Installation', NULL, 11, 'agentInstallationSettings', (select id from auth_resource3 where name='menu|Settings|Agent Installation'), 2) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 2 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Settings|Agent Installation'); + +--Health Check +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (121, 'Health Check', '09.HealthCheck', 9, 'healthCHeck', (select id from auth_resource3 where name='menu|Health Check'), 0) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 0 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Health Check'); + +INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) +VALUES (122, 'Check Script', NULL, 0, 'checkScript', (select id from auth_resource3 where name='menu|Health Check|Check Script'), 0) +ON CONFLICT (id) + DO + UPDATE SET scope_level = 0 + WHERE public.menu_meta.auth_resource3_id = (SELECT id FROM auth_resource3 WHERE name = 'menu|Health Check|Check Script'); + +INSERT INTO public.license_policy +(policy_id, policy_desc, term_year, term_month, term_day, license_type, allowable_range, storage_capacity, cluster_count, node_count, pod_count, service_count, core_count, host_ids, user_division, created_date, modified_date) +VALUES('promotion_license', '프로모션 기간에 사용자들에게 발급되는 라이선스', 0, 0, 14, 'trial', '0', 'unlimited', '1', '10', 'unlimited', 'unlimited', 'unlimited', 'unlimited', '1', now(), null); \ No newline at end of file diff --git a/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_R30020210730.psql b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_R30020210730.psql new file mode 100644 index 0000000..60ad862 --- /dev/null +++ b/roles/cmoa_pgpatch/files/pg-patch/postgres_patch_R30020210730.psql @@ -0,0 +1,4 @@ +alter table cloud_user alter column log_in_count set default 0; +alter table cloud_user alter column user_lock set default false; + +UPDATE public.metric_meta2 SET meta_name = 'Number of Containers Restart', description = 'Number of Containers Restart (10m)', expr = 'increase(imxc_kubernetes_container_restart_count{{filter}}[10m])', resource_type = 'State', entity_type = 'Workload', groupby_keys = null, in_use = true, anomaly_score = false, message = 'CLST:{{$labels.xm_clst_id}} CONT:{{$labels.xm_cont_name}} RESTARTCOUNT FOR 10MINUTE:{{humanize $value}}.', created_date = '2021-06-23 09:30:38.646312', modified_date = '2021-06-23 09:30:38.646312' WHERE id = 'cotainer_restart_count_by_workload'; \ No newline at end of file diff --git a/roles/cmoa_pgpatch/tasks/main.yml b/roles/cmoa_pgpatch/tasks/main.yml new file mode 100644 index 0000000..c353562 --- /dev/null +++ b/roles/cmoa_pgpatch/tasks/main.yml @@ -0,0 +1,24 @@ +--- +- name: 1. Get a list of all pods from the namespace + command: kubectl -n "{{ cmoa_namespace }}" get pods --no-headers -o custom-columns=":metadata.name" + #register: pod_list + +- debug: + msg: "{{ pod_list.stdout_lines }}" + +#- name: 2. Copy psql file in postgres (DDL) +# kubernetes.core.k8s_cp: +# namespace: "{{ cmoa_namespace }}" +# pod: "{{ pod_list.stdout }}" +# remote_path: /tmp/postgres_patch_{{ pg_version }}.psql +# local_path: "{{ role_path }}/files/pg-patch/postgres_patch_{{ pg_version }}.psql" +# with_items: "{{ pg_version }}" +# when: kubernetes_role == 'master' +# +#- name: 3. Execute a command in postgres (DDL) +# kubernetes.core.k8s_exec: +# namespace: "{{ cmoa_namespace }}" +# pod: "{{ pod_list.stdout }}" +# command: bash -c "PGPASSWORD='eorbahrhkswp' && /usr/bin/psql -h 'localhost' -U 'admin' -d 'postgresdb' -f /tmp/postgres_patch_{{ pg_version }}.psql" +# with_items: "{{ pg_version }}" +# when: kubernetes_role == 'master'