add pg patch role

This commit is contained in:
ByeonJungHun
2023-10-04 14:51:02 +09:00
parent bc3c19859e
commit e878cd7a68
22 changed files with 8396 additions and 7 deletions

View File

@@ -8,5 +8,6 @@
REGISTRY: 10.10.31.243:5000/cmoa3 REGISTRY: 10.10.31.243:5000/cmoa3
REPO: "nexus" # dockerhub or nexus REPO: "nexus" # dockerhub or nexus
roles: roles:
- role: cmoa_install - role: agent_os_setting
delegate_to: 127.0.0.1 # - role: cmoa_install
# delegate_to: 127.0.0.1

15
cmoa_pgpatch.yaml Executable file
View File

@@ -0,0 +1,15 @@
---
- hosts: cluster
become: true
gather_facts: true
environment:
KUBECONFIG: /root/.kube/ansible_config
vars:
cmoa_namespace: imxc
pg_version:
- 3.5.2
- 3.5.3
- 3.5.4
roles:
- role: cmoa_pgpatch
delegate_to: 127.0.0.1

View File

@@ -1,16 +1,12 @@
[master] [master]
10.10.43.206 10.10.43.210
[worker1] [worker1]
10.10.43.207
[worker2] [worker2]
10.10.43.208
[cluster:children] [cluster:children]
master master
worker1
worker2
[master:vars] [master:vars]

Binary file not shown.

View File

@@ -0,0 +1,7 @@
---
cmoa_namespace: imxc
pg_version:
- 3.5.1
- 3.5.2
- 3.5.3
- 3.5.4

View File

@@ -0,0 +1,803 @@
UPDATE public.metric_meta2 SET expr='sum by (xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) ((container_memory_usage_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / (((container_spec_memory_limit_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0)) > 0) * 100) or sum by (xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) ((container_memory_usage_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) imxc_kubernetes_container_resource_limit_cpu{{filter}} * 0) / 1024 / 1024 / 1024 *100)' WHERE id = 'container_memory_usage_by_workload';
UPDATE public.agent_install_file_info SET yaml = '---
apiVersion: v1
kind: List
items:
- apiVersion: apps/v1
kind: Deployment
metadata:
name: cloudmoa-trace-agent
namespace: $CLOUDMOA_NAMESPACE
labels:
app: cloudmoa-trace-agent
spec:
selector:
matchLabels:
app: cloudmoa-trace-agent
replicas: 1
strategy:
type: Recreate
template:
metadata:
labels:
app: cloudmoa-trace-agent
spec:
securityContext:
runAsNonRoot: true
runAsUser: 65534
containers:
- image: $DOCKER_REGISTRY_URL/trace-agent:$IMAGE_TAG
name: cloudmoa-trace-agent
resources:
requests:
cpu: 100m
memory: 50Mi
limits:
cpu: 200m
memory: 100Mi
ports:
- containerPort: 5775
protocol: UDP
- containerPort: 6831
protocol: UDP
- containerPort: 6832
protocol: UDP
- containerPort: 5778
protocol: TCP
env:
- name: LOG_LEVEL
value: "INFO"
- name: CLUSTER_ID
value: $CLOUDMOA_CLUSTER_ID
- name: DATAGATE
value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT
- apiVersion: v1
kind: Service
metadata:
name: cloudmoa-trace-agent
namespace: $CLOUDMOA_NAMESPACE
labels:
app: cloudmoa-trace-agent
spec:
ports:
- name: agent-zipkin-thrift
port: 5775
protocol: UDP
targetPort: 5775
- name: agent-compact
port: 6831
protocol: UDP
targetPort: 6831
- name: agent-binary
port: 6832
protocol: UDP
targetPort: 6832
- name: agent-configs
port: 5778
protocol: TCP
targetPort: 5778
selector:
app: cloudmoa-trace-agent
type: ClusterIP' WHERE id = 7;
UPDATE public.agent_install_file_info SET yaml = '---
apiVersion: v1
kind: Service
metadata:
annotations:
prometheus.io/scrape: ''true''
labels:
app: cloudmoa-node-exporter
name: cloudmoa-node-exporter
name: cloudmoa-node-exporter
namespace: $CLOUDMOA_NAMESPACE
spec:
clusterIP: None
ports:
- name: scrape
port: 9110
protocol: TCP
selector:
app: cloudmoa-node-exporter
type: ClusterIP
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: cloudmoa-node-exporter
namespace: $CLOUDMOA_NAMESPACE
spec:
selector:
matchLabels:
app: cloudmoa-node-exporter
template:
metadata:
labels:
app: cloudmoa-node-exporter
name: cloudmoa-node-exporter
spec:
tolerations:
- effect: NoSchedule
operator: Exists
- effect: NoExecute
operator: Exists
containers:
- image: $DOCKER_REGISTRY_URL/node-exporter
name: cloudmoa-node-exporter
ports:
- containerPort: 9110
hostPort: 9110
name: scrape
args:
- --path.procfs=/host/proc
- --path.sysfs=/host/sys
- --path.rootfs=/host/root
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|run|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
- --collector.tcpstat
- --web.listen-address=:9110
# --log.level=debug
resources:
limits:
cpu: 250m
memory: 180Mi
requests:
cpu: 102m
memory: 180Mi
volumeMounts:
- mountPath: /host/proc
name: proc
readOnly: false
- mountPath: /host/sys
name: sys
readOnly: false
- mountPath: /host/root
mountPropagation: HostToContainer
name: root
readOnly: true
hostNetwork: true
hostPID: true
securityContext:
runAsNonRoot: true
runAsUser: 65534
volumes:
- hostPath:
path: /proc
name: proc
- hostPath:
path: /sys
name: sys
- hostPath:
path: /
name: root
' WHERE id = 4;
UPDATE public.agent_install_file_info SET yaml = '---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: cloudmoa-cluster-role
rules:
- nonResourceURLs:
- "*"
verbs:
- get
- apiGroups:
- metrics.k8s.io
resources:
- pods
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- list
- watch
- update
- apiGroups:
- ""
resources:
- services
verbs:
- get
- list
- watch
- update
- apiGroups:
- ""
resources:
- nodes/stats
- endpoints
- namespaces
- events
verbs:
- get
- list
- watch
- apiGroups:
- apps
resources:
- daemonsets
- deployments
- deployments/scale
- replicasets
- replicasets/scale
- statefulsets
- statefulsets/scale
verbs:
- get
- list
- watch
- apiGroups:
- batch
resources:
- jobs
verbs:
- get
- list
- watch
- update
- apiGroups:
- batch
resources:
- cronjobs
verbs:
- get
- list
- update
- apiGroups:
- storage.j8s.io
resources:
- storageclasses
verbs:
- get
- list
- apiGroups:
- networking.k8s.io
resources:
- ingresses
verbs:
- get
- list
- apiGroups:
- extensions
resources:
- ingresses
verbs:
- get
- list
- apiGroups:
- policy
resources:
- podsecuritypolicies
verbs:
- use
resourceNames:
- imxc-ps
- apiGroups:
- certificates.k8s.io
resourceNames:
- kubernetes.io/kube-apiserver-client-kubelet
resources:
- signers
verbs:
- approve
- apiGroups:
- certificates.k8s.io
resourceNames:
- kubernetes.io/kubelet-serving
resources:
- signers
verbs:
- approve
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- proxy
- apiGroups:
- ""
resources:
- nodes/log
- nodes/metrics
- nodes/proxy
- nodes/spec
- nodes/stats
verbs:
- ''*''
- apiGroups:
- ''*''
resources:
- ''*''
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: cloudmoa-restricted-rb
namespace: $CLOUDMOA_NAMESPACE
subjects:
- kind: ServiceAccount
name: default
namespace: $CLOUDMOA_NAMESPACE
roleRef:
kind: ClusterRole
name: cloudmoa-cluster-role
apiGroup: rbac.authorization.k8s.io
---
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:
name: cloudmoa-psp
namespace: $CLOUDMOA_NAMESPACE
spec:
privileged: true
seLinux:
rule: RunAsAny
supplementalGroups:
rule: RunAsAny
runAsUser:
rule: RunAsAny
fsGroup:
rule: RunAsAny
hostPorts:
- max: 65535
min: 0
hostNetwork: true
hostPID: true
volumes:
- configMap
- secret
- emptyDir
- hostPath
- projected
- downwardAPI
- persistentVolumeClaim
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: cloudmoa-topology-agent
namespace: $CLOUDMOA_NAMESPACE
labels:
app: cloudmoa-topology-agent
spec:
selector:
matchLabels:
app: cloudmoa-topology-agent
template:
metadata:
labels:
app: cloudmoa-topology-agent
spec:
hostNetwork: true
hostPID: true
tolerations:
- effect: NoSchedule
operator: Exists
- effect: NoExecute
operator: Exists
containers:
- name: cloudmoa-topology-agent
image: $DOCKER_REGISTRY_URL/topology-agent:$IMAGE_TAG
imagePullPolicy: Always
resources:
requests:
cpu: 200m
memory: 512Mi
limits:
cpu: 500m
memory: 600Mi
securityContext:
privileged: true
volumeMounts:
- mountPath: /host/usr/bin
name: bin-volume
- mountPath: /var/run/docker.sock
name: docker-volume
- mountPath: /host/proc
name: proc-volume
- mountPath: /root
name: root-volume
- mountPath: /log
name: log-volume
env:
- name: DATAGATE
value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT
- name: CLUSTER_ID
value: $CLOUDMOA_CLUSTER_ID
- name: ROOT_DIRECTORY
value: /root
- name: NODE_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: spec.nodeName
- name: LOG_LEVEL
value: "INFO"
volumes:
- name: bin-volume
hostPath:
path: /usr/bin
type: Directory
- name: docker-volume
hostPath:
path: /var/run/docker.sock
- name: proc-volume
hostPath:
path: /proc
- name: root-volume
hostPath:
path: /
- name: log-volume
hostPath:
path: /home' WHERE id = 2;
UPDATE public.agent_install_file_info SET yaml = '---
apiVersion: v1
kind: ConfigMap
metadata:
name: cloudmoa-metric-agent-config
namespace: $CLOUDMOA_NAMESPACE
data:
scaling.rules: |
groups:
- name: scaleup
rules :
- alert : ScaleUpRule
expr: job:webapp_config_open_sessions_current_count:sum > 15
annotations:
summary: "Scale up when current sessions is greater than 15"
description: "Firing when total sessions active greater than 15"
metric-agent.yml: |
global:
scrape_interval: 15s
scrape_configs:
- job_name: ''kubernetes-kubelet''
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- target_label: xm_entity_type
replacement: ''Node''
metric_relabel_configs:
- source_labels: [ __name__ ]
regex: (kubelet_running_pod_count)
action: keep
- job_name: ''kubernetes-node-exporter''
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- source_labels: [__meta_kubernetes_role]
action: replace
target_label: kubernetes_role
- source_labels: [__address__]
regex: ''(.*):10250''
replacement: ''${1}:9110''
target_label: __address__
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: __instance__
# set "name" value to "job"
- source_labels: [job]
regex: ''kubernetes-(.*)''
replacement: ''${1}''
target_label: name
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_entity_type
replacement: ''Node''
- source_labels: [__meta_kubernetes_namespace]
separator: ;
regex: (.*)
target_label: xm_namespace
replacement: $1
action: replace
metric_relabel_configs:
- source_labels: [ __name__ ]
regex: (node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total)
action: keep
- job_name: ''kubernetes-cadvisor''
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
metrics_path: /metrics/cadvisor
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_entity_type
replacement: ''Container''
metric_relabel_configs:
- source_labels: [namespace]
target_label: xm_namespace
- source_labels: [pod]
target_label: xm_pod_id
- source_labels: [container]
target_label: xm_cont_name
- source_labels: [id]
target_label: xm_cont_id
- source_labels: [container]
regex: (.+)
action: keep
- source_labels: [ __name__ ]
regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes)
action: keep
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: cloudmoa-metric-agent
namespace: $CLOUDMOA_NAMESPACE
labels:
app: cloudmoa-metric-agent
spec:
selector:
matchLabels:
app: cloudmoa-metric-agent
replicas: 1
strategy:
type: Recreate
template:
metadata:
labels:
app: cloudmoa-metric-agent
spec:
containers:
- name: cloudmoa-metric-agent
image: $DOCKER_REGISTRY_URL/metric-agent:$IMAGE_TAG
args:
- --config.file=/etc/metric-agent/metric-agent.yml
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
cpu: 300m
memory: 1000Mi
volumeMounts:
- mountPath: /etc/metric-agent/
name: config-volume
env:
- name: LOG_LEVEL
value: "INFO"
- name: LOG_MAXAGE
value: "1"
- name: CLUSTER_ID
value: $CLOUDMOA_CLUSTER_ID
- name: DATAGATE
value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT
- name: STORAGE_TYPE
value: datagate
restartPolicy: Always
volumes:
- name: config-volume
configMap:
name: cloudmoa-metric-agent-config
' WHERE id = 6;
UPDATE public.agent_install_file_info SET yaml = '---
apiVersion: v1
kind: ConfigMap
metadata:
name: cloudmoa-metric-agent-config
namespace: $CLOUDMOA_NAMESPACE
data:
scaling.rules: |
groups:
- name: scaleup
rules :
- alert : ScaleUpRule
expr: job:webapp_config_open_sessions_current_count:sum > 15
annotations:
summary: "Scale up when current sessions is greater than 15"
description: "Firing when total sessions active greater than 15"
metric-agent.yml: |
global:
scrape_interval: 15s
scrape_configs:
- job_name: ''kubernetes-kubelet''
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- target_label: xm_entity_type
replacement: ''Node''
metric_relabel_configs:
- source_labels: [ __name__ ]
regex: (kubelet_running_pod_count)
action: keep
- job_name: ''kubernetes-node-exporter''
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- source_labels: [__meta_kubernetes_role]
action: replace
target_label: kubernetes_role
- source_labels: [__address__]
regex: ''(.*):10250''
replacement: ''${1}:9110''
target_label: __address__
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: __instance__
# set "name" value to "job"
- source_labels: [job]
regex: ''kubernetes-(.*)''
replacement: ''${1}''
target_label: name
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_entity_type
replacement: ''Node''
- source_labels: [__meta_kubernetes_namespace]
separator: ;
regex: (.*)
target_label: xm_namespace
replacement: $1
action: replace
metric_relabel_configs:
- source_labels: [ __name__ ]
regex: (node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total)
action: keep
- job_name: ''kubernetes-cadvisor''
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
metrics_path: /metrics/cadvisor
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_entity_type
replacement: ''Container''
metric_relabel_configs:
- source_labels: [namespace]
target_label: xm_namespace
- source_labels: [pod_name]
target_label: xm_pod_id
- source_labels: [container_name]
target_label: xm_cont_name
- source_labels: [id]
target_label: xm_cont_id
- source_labels: [container_name]
regex: (.+)
action: keep
- source_labels: [ __name__ ]
regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes)
action: keep
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: cloudmoa-metric-agent
namespace: $CLOUDMOA_NAMESPACE
labels:
app: cloudmoa-metric-agent
spec:
selector:
matchLabels:
app: cloudmoa-metric-agent
replicas: 1
strategy:
type: Recreate
template:
metadata:
labels:
app: cloudmoa-metric-agent
spec:
containers:
- name: cloudmoa-metric-agent
image: $DOCKER_REGISTRY_URL/metric-agent:$IMAGE_TAG
args:
- --config.file=/etc/metric-agent/metric-agent.yml
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
cpu: 300m
memory: 1000Mi
volumeMounts:
- mountPath: /etc/metric-agent/
name: config-volume
env:
- name: LOG_LEVEL
value: "INFO"
- name: CLUSTER_ID
value: $CLOUDMOA_CLUSTER_ID
- name: DATAGATE
value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT
- name: STORAGE_TYPE
value: datagate
restartPolicy: Always
volumes:
- name: config-volume
configMap:
name: cloudmoa-metric-agent-config
' WHERE id = 3;

View File

@@ -0,0 +1,919 @@
-- from diff
CREATE DATABASE CONFIGS;
CREATE DATABASE keycloak;
-- cortex alert
create table public.alert_rule_config_info (
config_id varchar not null,
config_data text not null,
in_use boolean default true not null,
created_date timestamp,
modified_date timestamp
);
create table alert_config_info
(
config_id varchar not null,
config_data text not null,
config_default text not null,
in_use boolean default true not null,
created_date timestamp,
modified_date timestamp
);
create table alert_config
(
id bigint not null,
cluster_id varchar,
resolve_timeout varchar,
receiver varchar,
group_by varchar,
group_wait varchar,
group_interval varchar,
repeat_interval varchar,
routes_level varchar,
routes_continue varchar,
receiver_name varchar,
webhook_url varchar,
send_resolved varchar,
inner_route boolean,
inner_webhook boolean,
in_use boolean default true not null,
created_date timestamp,
modified_date timestamp
);
ALTER TABLE public.alert_rule_config_info ADD CONSTRAINT alert_rule_config_info_config_id_pk PRIMARY KEY (config_id);
ALTER TABLE public.alert_config_info ADD CONSTRAINT alert_config_info_config_id_pk PRIMARY KEY (config_id);
ALTER TABLE public.alert_config ADD CONSTRAINT alert_config_id_pk PRIMARY KEY (id);
alter table tenant_info
add delete_scheduler_date timestamp;
alter table tenant_info
add tenant_init_clusters varchar(255);
alter table cloud_user
add dormancy_date timestamp;
alter table cloud_user
add status varchar(255) default 'use'::character varying not null;
-- DELETE
-- FROM public.auth_resource3
-- WHERE name = 'menu|Health Check|Check Script';
-- DELETE
-- FROM public.auth_resource3
-- WHERE name = 'menu|Health Check';
INSERT INTO public.auth_resource3 (name, is_deleted, tenant_id) VALUES ('menu|Services|Active Transaction', false, null);
UPDATE public.menu_meta
SET position = 10::integer
WHERE id = 80::bigint;
UPDATE public.menu_meta
SET position = 99::integer
WHERE id = 90::bigint;
INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) VALUES (26, 'Active Transaction', NULL, 5, 'overviewServiceJSPD', (select id from auth_resource3 where name='menu|Services|Active Transaction'), 2);
insert into public.alert_config_info (config_id, created_date, modified_date, config_data, config_default, in_use) values ('config', now(), null, 'global:${GLOBAL}\nroute:${ROUTE}\nreceivers:${RECEIVERS}', 'global:${GLOBAL}\nroute:${ROUTE}\nreceivers:${RECEIVERS}', true);
insert into public.alert_config_info (config_id, created_date, modified_date, config_data, config_default, in_use) values ('global', now(), null, '\n resolve_timeout: ${RESOLVE_TIMEOUT}', '\n resolve_timeout: 5m', true);
insert into public.alert_config_info (config_id, created_date, modified_date, config_data, config_default, in_use) values ('receivers', now(), null, '\n- name: ''${NAME}''\n webhook_configs:${WEBHOOK_CONFIGS}', '\n- name: ''cdms''\n webhook_configs:${WEBHOOK_CONFIGS}', true);
insert into public.alert_config_info (config_id, created_date, modified_date, config_data, config_default, in_use) values ('route', now(), null, '\n receiver: ''${RECEIVER}''\n group_by: [${GROUP_BY}]\n group_wait: ${GROUP_WAIT}\n group_interval: ${GROUP_INTERVAL}\n repeat_interval: ${REPEAT_INTERVAL}\n routes:${ROUTES}', '\n receiver: ''cdms''\n group_by: [xm_clst_id, level]\n group_wait: 30s\n group_interval: 5m\n repeat_interval: 10m\n routes:${ROUTES}', true);
insert into public.alert_config_info (config_id, created_date, modified_date, config_data, config_default, in_use) values ('webhook_configs', now(), null, '\n - url: ''${WEBHOOK_URL}''\n send_resolved: ${SEND_RESOLVED}', '\n - url: ''${WEBHOOK_URL}''\n send_resolved: false', true);
insert into public.alert_config_info (config_id, created_date, modified_date, config_data, config_default, in_use) values ('routes', now(), null, '\n - receiver: ''${ROUTES_RECEIVER}''\n group_by: [${ROUTES_GROUP_BY}]\n group_wait: ${ROUTES_GROUP_WAIT}\n group_interval: ${ROUTES_GROUP_INTERVAL}\n repeat_interval: ${ROUTES_REPEAT_INTERVAL}\n match_re:\n level: ${LEVEL}\n continue: ${CONTINUE}', '\n - receiver: ''cdms''\n group_by: [xm_clst_id, level]\n group_wait: 5s\n group_interval: 5s\n repeat_interval: 1m\n match_re:\n level: Critical\n continue: true', true);
insert into public.alert_rule_config_info (config_id, created_date, modified_date, config_data, in_use) values ('config', now(), null, 'groups:${GROUPS}', true);
insert into public.alert_rule_config_info (config_id, created_date, modified_date, config_data, in_use) values ('groups', now(), null, '\n- name: "${NAME}"\n rules:${RULES}', true);
insert into public.alert_rule_config_info (config_id, created_date, modified_date, config_data, in_use) values ('isHost', now(), null, '\n instance: "{{ $labels.instance }}"\n is_host: "true"', true);
insert into public.alert_rule_config_info (config_id, created_date, modified_date, config_data, in_use) values ('rules', now(), null, '\n - alert: "${ALERT}"\n expr: "${EXPR}"\n labels:\n level: "${LEVEL}"\n for: "${FOR}"\n annotations:\n xm_service_name: "{{ $labels.xm_service_name }}"\n level: "${LEVEL}"\n meta_id: "${META_ID}"\n xm_node_id: "{{ $labels.xm_node_id }}"\n threshold: ${THRESHOLD}\n xm_container_id: "{{ $labels.xm_cont_name }}"\n message: "${MESSAGE}"\n rule_id: ${RULE_ID}\n xm_pod_id: "{{ $labels.xm_pod_id }}"\n xm_clst_id: "{{ $labels.xm_clst_id }}"\n xm_namespace: "{{ $labels.xm_namespace }}"\n value: "{{ $value }}"\n xm_entity_type: "{{ $labels.xm_entity_type }}"', true);
-- JSPD 옵션 값 테이블
CREATE TABLE public.jspd_prop (
code_id character varying(255) NOT NULL,
default_value character varying(255) NOT NULL,
description text,
code_type character varying(255),
input_type character varying(255),
input_props character varying(255),
use_yn boolean NOT NULL,
created_date timestamp without time zone NOT NULL,
modified_date timestamp without time zone NOT NULL
);
ALTER TABLE ONLY public.jspd_prop ADD CONSTRAINT jspd_prop_pkey PRIMARY KEY (code_id);
-- JSPD 옵션 값 설정 LIST table
CREATE TABLE public.jspd_config (
cluster_id character varying(255) NOT NULL,
namespace character varying(255) NOT NULL,
service character varying(255) NOT NULL,
code_id character varying(255),
code_value character varying(255),
code_type character varying(255),
created_date timestamp without time zone NOT NULL,
modified_date timestamp without time zone NOT NULL
);
-- ALTER TABLE public.jspd_prop
-- ADD input_type character varying(255);
-- ALTER TABLE public.jspd_prop
-- ADD input_props character varying(255);
ALTER TABLE public.jspd_config
ADD CONSTRAINT jspd_config_pkey PRIMARY KEY (cluster_id, namespace, service, code_id);
ALTER TABLE ONLY public.jspd_config
ADD CONSTRAINT jspd_config_code_id_fk FOREIGN KEY (code_id) REFERENCES public.jspd_prop(code_id);
INSERT INTO jspd_prop values('TRX_NAME_TYPE','0', 'Set the transaction name generation method (0:default, 1:parameter, 2:param_nouri, 3:attribute)', 'integer','select','{"default":"0", "parameter":"1", "param_nouri":"2", "attribute":"3"}',true, now(), now());
INSERT INTO jspd_prop values('TRX_NAME_KEY','', 'Set the transaction name generation method by TRX_NAME_TYPE (parameter(1), param_nouri(2),attribute(3))','string','input','',true, now(), now());
INSERT INTO jspd_prop values('CURR_TRACE_TXN','*:3000', 'Option to check TXNNAME with startsWith logic and collect calltree based on elapsetime. blank or set to *:0 when collecting all.', 'string','input','', true, now(), now());
INSERT INTO jspd_prop values('CURR_TRACE_LEVEL','100', 'call tree detection level', 'integer','range','{"gte":"0", "lte":"100"}',true, now(), now());
INSERT INTO jspd_prop values('TRACE_JDBC','true', 'include call tree data', 'boolean','input','',true, now(), now());
INSERT INTO jspd_prop values('EXCLUDE_SERVICE','gif,js,css,xml', 'exclude service name', 'string','input','',true, now(), now());
INSERT INTO jspd_prop values('INCLUDE_EXCEPTION','', 'Exception that you do not want to be treated as an exception transaction is set.(type.Exception)', 'string','input','',true, now(), now());
INSERT INTO jspd_prop values('EXCLUDE_EXCEPTION','', 'Set the exception to be treated as an exception transaction.(type.Exception)', 'string','input','',true, now(), now());
INSERT INTO jspd_prop values('RESP_HEADER_TID','false', 'include X-Xm-Tid text for gearing imxwsmj', 'boolean','input','',true, now(), now());
INSERT INTO jspd_prop values('USE_RUNTIME_REDEFINE','false', 'rt.jar (socket, file, throwable) function use yn option', 'boolean','input','',true, now(), now());
INSERT INTO jspd_prop values('USE_RUNTIME_REDEFINE_HTTP_REMOTE','false', 'rt.jar (socket, file, throwable) function use yn option', 'boolean','input','',true, now(), now());
INSERT INTO jspd_prop values('RT_RMI','false', 'rt.jar (socket, file, throwable) function use yn option', 'boolean','input','',true, now(), now());
INSERT INTO jspd_prop values('RT_RMI_TYPE','3', 'remote key value(1: pkey, 2: ckey, 3: pckey)', 'integer','select','{"pkey":"1", "ckey":"2", "pckey":"3"}',true, now(), now());
INSERT INTO jspd_prop values('RT_RMI_ELAPSE_TIME','0', 'Collect transactions that are greater than or equal to the option value', 'integer','input','',true, now(), now());
INSERT INTO jspd_prop values('RT_FILE','0x10', 'Display file input/output in call tree', 'string','input','',true, now(), now());
INSERT INTO jspd_prop values('RT_SOCKET','0x10', 'Display socket input/output in call tree', 'string','input','',true, now(), now());
INSERT INTO jspd_prop values('MTD_LIMIT','100000', 'Limit the number of calltree', 'integer','range','{"gte":"0"}',true, now(), now());
INSERT INTO jspd_prop values('LIMIT_SQL','20', 'Collection limits based on SQL sentence length', 'integer','input','',true, now(), now());
INSERT INTO jspd_prop values('TXN_COUNT_LIMIT','3000', 'Transactions per second', 'integer','input','',true, now(), now());
INSERT INTO jspd_prop values('USE_SQL_ELLIPSIS','false', 'Collect length of sql string by half of SQL_TEXT_BUFFER_SIZE', 'boolean','input','',true, now(), now());
INSERT INTO jspd_prop values('TXN_SQL_LIMIT_COUNT','2000', 'SQL collection limit', 'integer','input','',true, now(), now());
INSERT INTO jspd_prop values('TXN_CPU_TIME','false', 'cpu time metric used in transactions option', 'boolean','input','',true, now(), now());
INSERT INTO jspd_prop values('TXN_MEMORY','false', 'memory alloc size metric used in transactions option', 'boolean','input','',true, now(), now());
INSERT INTO jspd_prop values('ENABLE_WEB_ID_WHEN_NO_USERAGENT','false', 'Do not create an web ID unless requested by the browser', 'boolean','input','',true, now(), now());
INSERT INTO jspd_prop values('USE_SQL_SEQ','false', 'Add sequence number to sql and packet', 'boolean','input','',true, now(), now());
INSERT INTO jspd_prop values('TRACE_FETCH_METHOD','false', 'Display the fetch function of ResultSet in the call tree', 'boolean','input','',true, now(), now());
INSERT INTO jspd_prop values('EXCLUDE_THREAD','', 'Ability to block monitoring of a specific thread name, value = String[] (prefix1,prefix2)', 'string','input','',true, now(), now());
INSERT INTO jspd_prop values('USE_METHOD_SEQ','false', 'Display the calltree in the form of a time series without summary', 'boolean','input','',true, now(), now());
INSERT INTO jspd_prop values('TRACE_METHOD_MEMORY','false', 'Collects allocation memory for each method of calltree. (unit k)', 'boolean','input','',true, now(), now());
INSERT INTO jspd_prop values('TRACE_METHOD_CPUTIME','false', 'Collects cputime for each method of calltree. (unit ms)', 'boolean','input','',true, now(), now());
INSERT INTO jspd_prop values('DISABLE_ROOT_METHOD','false', 'Express the service root method at the top of the call tree', 'boolean','input','',true, now(), now());
INSERT INTO jspd_prop values('MTD_BUFFER_SIZE','2500', 'size of the internal buffer that stores the call tree method data.', 'integer','input','',true, now(), now());
INSERT INTO jspd_prop values('MTD_STACK_BUFFER_SIZE','100', 'A separate option to additionally collect methods that did not generate an error among methods that were not collected because the MTD_BUFFER_SIZE option value was exceeded.', 'integer','input','',true, now(), now());
INSERT INTO jspd_prop values('MTD_EXCEPTION_BUFFER_SIZE','100', 'A separate option to additionally collect methods that have an error among methods that could not be collected because the MTD_BUFFER_SIZE option value was exceeded.', 'integer','input','',true, now(), now());
INSERT INTO jspd_prop values('DEBUG','0x000000000', 'Option to specify log level (Debugging)', 'string','input','',true, now(), now());
INSERT INTO jspd_prop values('EXCEPTION_LIMIT', '-1', 'Exception content length limit', 'integer', 'input', '', true, now(), now());
INSERT INTO jspd_prop values('TXN_SEND_PERIOD', '1000', 'Txninfo transmission cycle (ms)', 'integer', 'input', '', true, now(), now());
INSERT INTO jspd_prop values('MTD_SEND_PERIOD', '1000', 'Txnmethod transmission cycle (ms)', 'integer', 'input', '', true, now(), now());
INSERT INTO jspd_prop values('SQL_SEND_PERIOD', '1000', 'Txnspl transmission cycle (ms)', 'integer', 'input', '', true, now(), now());
INSERT INTO jspd_prop values('ETOE_SEND_PERIOD', '1000', 'E2einfo transmission cycle (ms)', 'integer', 'input', '', true, now(), now());
INSERT INTO jspd_prop values('TXN_SEND_LIMIT', '15000', 'Txninfo maximum number of transfers', 'integer', 'input', '', true, now(), now());
INSERT INTO jspd_prop values('MTD_SEND_LIMIT', '15000', 'Txnmethod maximum number of transfers', 'integer', 'input', '', true, now(), now());
INSERT INTO jspd_prop values('SQL_SEND_LIMIT', '15000', 'Txnsql maximum number of transfers', 'integer', 'input', '', true, now(), now());
INSERT INTO jspd_prop values('ETOE_SEND_LIMIT', '15000', 'E2einfo maximum number of transfers', 'integer', 'input', '', true, now(), now());
---public.metric_meta2
UPDATE public.metric_meta2 SET expr = '((node_memory_MemTotal_bytes{xm_entity_type="Node", {filter}} - (node_memory_MemFree_bytes{xm_entity_type="Node", {filter}} + node_memory_Cached_bytes{xm_entity_type="Node", {filter}} + node_memory_Buffers_bytes{xm_entity_type="Node", {filter}} + node_memory_SReclaimable_bytes{xm_entity_type="Node", {filter}})) >= 0 or node_memory_MemTotal_bytes{xm_entity_type="Node", {filter}} - node_memory_MemFree_bytes{xm_entity_type="Node", {filter}}) / 1024 / 1024 / 1024'::text WHERE id LIKE 'node#_memory#_used' ESCAPE '#';
UPDATE public.metric_meta2 SET expr = '((node_memory_MemTotal_bytes{{filter}} - (node_memory_MemFree_bytes{{filter}} + node_memory_Cached_bytes{{filter}} + node_memory_Buffers_bytes{{filter}} + node_memory_SReclaimable_bytes{{filter}})) >= 0 or (node_memory_MemTotal_bytes{{filter}} - node_memory_MemFree_bytes{{filter}})) / node_memory_MemTotal_bytes{{filter}} * 100'::text WHERE id LIKE 'host#_memory#_usage' ESCAPE '#';
UPDATE public.metric_meta2 SET expr = 'sum by(instance, mountpoint, fstype, data_type) (
label_replace(node_filesystem_size_bytes {fstype!="rootfs",{filter}}, "data_type", "totalsize", "", "") or
label_replace(node_filesystem_avail_bytes {fstype!="rootfs",{filter}}, "data_type", "availablesize", "", ""))'::text WHERE id LIKE 'host#_fs#_total#_by#_mountpoint' ESCAPE '#';
UPDATE public.metric_meta2 SET expr = '(1- avg by (xm_clst_id) (((node_memory_MemFree_bytes{xm_entity_type=''Node'', {filter}} + node_memory_Cached_bytes{xm_entity_type=''Node'', {filter}} + node_memory_Buffers_bytes{xm_entity_type=''Node'', {filter}}) <= node_memory_MemTotal_bytes{xm_entity_type=''Node'', {filter}} or node_memory_MemFree_bytes{xm_entity_type=''Node'', {filter}}) / node_memory_MemTotal_bytes{xm_entity_type=''Node'', {filter}})) * 100'::text WHERE id LIKE 'cluster#_memory#_usage' ESCAPE '#';
UPDATE public.metric_meta2 SET expr = '((node_memory_MemTotal_bytes{xm_entity_type=''Node'', {filter}} - (node_memory_MemFree_bytes{xm_entity_type=''Node'', {filter}} + node_memory_Cached_bytes{xm_entity_type=''Node'', {filter}} + node_memory_Buffers_bytes{xm_entity_type=''Node'', {filter}} + node_memory_SReclaimable_bytes{xm_entity_type=''Node'', {filter}})) >= 0 or (node_memory_MemTotal_bytes{xm_entity_type=''Node'', {filter}} - node_memory_MemFree_bytes{xm_entity_type=''Node'', {filter}})) / node_memory_MemTotal_bytes{xm_entity_type=''Node'', {filter}} * 100'::text WHERE id LIKE 'node#_memory#_usage' ESCAPE '#';
UPDATE public.metric_meta2 SET expr = '(node_memory_MemTotal_bytes{{filter}} - (node_memory_MemFree_bytes{{filter}} + node_memory_Cached_bytes{{filter}} + node_memory_Buffers_bytes{{filter}} + node_memory_SReclaimable_bytes{{filter}})) >= 0 or (node_memory_MemTotal_bytes{{filter}} - node_memory_MemFree_bytes{{filter}})'::text WHERE id LIKE 'host#_memory#_used' ESCAPE '#';
INSERT INTO public.metric_meta2 (id, meta_name, description, expr, resource_type, entity_type, groupby_keys, in_use, anomaly_score, message, created_date, modified_date) VALUES
('imxc_jspd_pod_txn_error_rate', 'Service Pod Transaction Error Rate', 'The number of transaction error rate for pod', 'sum by(xm_clst_id, xm_namespace, xm_pod_id, xm_service_name) (rate(imxc_txn_total_count{{filter}}[1m])) == 0 or sum by(xm_clst_id, xm_namespace, xm_pod_id, xm_service_name) (rate(imxc_txn_error_count{{filter}}[1m])) == 0 or sum by(xm_clst_id, xm_namespace, xm_pod_id, xm_service_name) (rate(imxc_txn_error_count {{filter}} [1m])) / sum by(xm_clst_id, xm_namespace, xm_pod_id, xm_service_name) (rate(imxc_txn_total_count {{filter}} [1m]))', 'Request', 'Service', NULL, 't', 'f', 'SVC:{{$labels.xm_service_name}} Svc Pod Transaction Error rate:{{humanize $value}}|{threshold}.', '2022-02-15 18:08:58.18', '2022-02-15 18:08:58.18');
INSERT INTO public.metric_meta2 (id, meta_name, description, expr, resource_type, entity_type, groupby_keys, in_use, anomaly_score, message, created_date, modified_date) VALUES
('imxc_jspd_txn_error_rate', 'Service Transaction Error Rate', 'Service Transaction Error Rate', 'sum by(xm_clst_id, xm_namespace, xm_service_name) (rate(imxc_txn_total_count{{filter}}[1m])) == 0 or sum by(xm_clst_id, xm_namespace, xm_service_name) (rate(imxc_txn_error_count{{filter}}[1m])) == 0 or sum by(xm_clst_id, xm_namespace, xm_service_name) (rate(imxc_txn_error_count {{filter}} [1m])) / sum by(xm_clst_id, xm_namespace, xm_service_name) (rate(imxc_txn_total_count {{filter}} [1m]))', 'Request', 'Service', NULL, 't', 'f', 'SVC:{{$labels.xm_service_name}} Error Request Rate:{{humanize $value}}%|{threshold}%.', '2022-02-15 14:33:00.118', '2022-02-15 15:40:17.64');
INSERT INTO public.metric_meta2 (id, meta_name, description, expr, resource_type, entity_type, groupby_keys, in_use, anomaly_score, message, created_date, modified_date) VALUES
('imxc_jspd_txn_elapsed_time_avg', 'Service Transaction Elapsed Time (avg)', 'Service Average Elapsed Time', 'sum by(xm_clst_id, xm_namespace, xm_service_name) ((increase(imxc_txn_total_count{{filter}}[1m])))== 0 or sum by(xm_clst_id, xm_namespace, xm_service_name) ((increase(imxc_txn_laytency{{filter}}[1m])))/ sum by(xm_clst_id, xm_namespace, xm_service_name) ((increase(imxc_txn_total_count{{filter}}[1m])))', 'Request', 'Service', NULL, 't', 't', 'SVC:{{$labels.xm_service_name}} Transaction Requests Time Avg:{{humanize $value}}ms|{threshold}ms.', '2021-11-15 16:09:34.233', '2021-11-15 16:12:21.335');
INSERT INTO public.metric_meta2 (id, meta_name, description, expr, resource_type, entity_type, groupby_keys, in_use, anomaly_score, message, created_date, modified_date) VALUES
('imxc_jspd_pod_txn_elapsed_time_avg', 'Service Pod Transaction Elapsed Time (avg)', 'The number of transaction counts per second for pod', 'sum by(xm_clst_id, xm_namespace, xm_pod_id, xm_service_name) (increase(imxc_txn_total_count{{filter}}[1m]))==0 or sum by(xm_clst_id, xm_namespace, xm_pod_id, xm_service_name) (increase(imxc_txn_laytency{{filter}}[1m])) / sum by(xm_clst_id, xm_namespace, xm_pod_id, xm_service_name) (increase(imxc_txn_total_count{{filter}}[1m]))', 'Request', 'Service', NULL, 't', 'f', 'SVC:{{$labels.xm_service_name}} Pod Transaction Requests Time Avg:{{humanize $value}}ms|{threshold}ms.', '2022-02-15 18:04:55.228', '2022-02-15 18:04:55.228');
INSERT INTO public.metric_meta2 (id, meta_name, description, expr, resource_type, entity_type, groupby_keys, in_use, anomaly_score, message, created_date, modified_date) VALUES
('imxc_jspd_txn_error_count', 'Service Transaction Error Count', 'Service Transaction Error Count', 'sum by(xm_clst_id, xm_namespace, xm_service_name) (rate(imxc_txn_error_count{{filter}}[1m])) == 0 or sum by(xm_clst_id, xm_namespace, xm_service_name) (rate(imxc_txn_error_count {{filter}} [1m])) ', 'Request', 'Service', NULL, 't', 't', 'SVC:{{$labels.xm_service_name}} Error Request count:{{humanize $value}}%|{threshold}%.', '2021-11-15 16:10:31.352', '2021-11-15 16:12:21.335');
INSERT INTO public.metric_meta2 (id, meta_name, description, expr, resource_type, entity_type, groupby_keys, in_use, anomaly_score, message, created_date, modified_date) VALUES
('imxc_jspd_txn_per_sec', 'Service Transaction Count (per Second)', 'Service Transaction Count (per Second)', 'sum by(xm_clst_id, xm_namespace, xm_service_name) (rate(imxc_txn_total_count{{filter}}[1m]))', 'Request', 'Service', NULL, 't', 't', 'SVC:{{$labels.xm_service_name}} Svc Transaction count/Seconds:{{humanize $value}}|{threshold}.', '2021-11-15 16:11:19.606', '2021-11-15 16:12:21.335');
INSERT INTO public.metric_meta2 (id, meta_name, description, expr, resource_type, entity_type, groupby_keys, in_use, anomaly_score, message, created_date, modified_date) VALUES
('imxc_jspd_pod_txn_per_sec', 'Service Pod Transaction Count (per sec)', 'The number of transaction counts per second for pod', 'sum by(xm_clst_id, xm_namespace, xm_pod_id, xm_service_name) (rate(imxc_txn_total_count{{filter}}[1m]))', 'Request', 'Service', NULL, 't', 'f', 'SVC:{{$labels.xm_service_name}} Svc Pod Transaction count/Seconds:{{humanize $value}}|{threshold}.', '2022-02-15 17:59:39.45', '2022-02-15 17:59:39.45');
-- Auto-generated SQL script #202202221030
UPDATE public.metric_meta2
SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (rate(container_cpu_system_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running", {filter}}) without (instance)) * 0) * 100'
WHERE id='container_cpu_system_by_workload';
UPDATE public.metric_meta2
SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (rate(container_cpu_system_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running", {filter}}) without (instance)) * 0)'
WHERE id='container_cpu_system_core_by_workload';
UPDATE public.metric_meta2
SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (rate(container_cpu_usage_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running", {filter}}) without (instance)) * 0)'
WHERE id='container_cpu_usage_by_workload';
UPDATE public.metric_meta2
SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (rate(container_cpu_usage_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running", {filter}}) without (instance)) * 0)'
WHERE id='container_cpu_usage_core_by_workload';
UPDATE public.metric_meta2
SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (rate(container_cpu_user_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running", {filter}}) without (instance)) * 0) * 100'
WHERE id='container_cpu_user_by_workload';
UPDATE public.metric_meta2
SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (rate(container_cpu_user_seconds_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running", {filter}}) without (instance)) * 0)'
WHERE id='container_cpu_user_core_by_workload';
UPDATE public.metric_meta2
SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (container_fs_limit_bytes{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running", {filter}}) without (instance)) * 0) / 1073741824'
WHERE id='container_fs_limit_bytes_by_workload';
UPDATE public.metric_meta2
SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (rate(container_fs_reads_bytes_total{xm_cont_name!="POD"} [1m])+ on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) / 1024'
WHERE id='container_fs_reads_by_workload';
UPDATE public.metric_meta2
SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (container_fs_usage_bytes{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) / 1073741824'
WHERE id='container_fs_usage_bytes_by_workload';
UPDATE public.metric_meta2
SET expr='sum by (xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) ((container_fs_usage_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0)/ (((container_fs_limit_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) * 100) > 0) or (container_fs_usage_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) / 1000)'
WHERE id='container_fs_usage_by_workload';
UPDATE public.metric_meta2
SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (rate(container_fs_writes_bytes_total{xm_cont_name!="POD"}[1m]) + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) / 1024'
WHERE id='container_fs_writes_by_workload';
UPDATE public.metric_meta2
SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (container_memory_cache{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) / 1073741824'
WHERE id='container_memory_cache_by_workload';
UPDATE public.metric_meta2
SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (container_memory_max_usage_bytes{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) / 1073741824'
WHERE id='container_memory_max_usage_bytes_by_workload';
UPDATE public.metric_meta2
SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (container_memory_swap{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) / 1073741824'
WHERE id='container_memory_swap_by_workload';
UPDATE public.metric_meta2
SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (container_memory_usage_bytes{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) / 1024 / 1024 / 1024'
WHERE id='container_memory_usage_bytes_by_workload';
UPDATE public.metric_meta2
SET expr='sum by (xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) ((container_memory_usage_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) / (((container_spec_memory_limit_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0)) > 0) * 100) or sum by (xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) ((container_memory_usage_bytes{xm_entity_type="Container", xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) / 1024 / 1024 / 1024 *100)'
WHERE id='container_memory_usage_by_workload';
UPDATE public.metric_meta2
SET expr='sum by(xm_clst_id, xm_namespace, owner_name, xm_pod_id, xm_cont_name, xm_entity_type) (container_memory_working_set_bytes{xm_cont_name!="POD"} + on (xm_clst_id, xm_namespace, xm_pod_id, xm_cont_name) group_left(owner_name) (sum (imxc_kubernetes_container_resource_limit_cpu{container_state="Running", pod_state="Running" ,{filter}}) without (instance)) * 0) / 1024 / 1024 / 1024'
WHERE id='container_memory_working_set_bytes_by_workload';
INSERT INTO public.metric_meta2 (id, meta_name, description, expr, resource_type, entity_type, groupby_keys, in_use, anomaly_score, message, created_date, modified_date) VALUES('imxc_jspd_active_txn_per_sec', 'Service Active Transaction Count (per Second)', 'Service Active Transaction Count (per Second)', 'sum by(xm_clst_id, xm_namespace, xm_service_name) (rate(imxc_txn_active_count {{filter}}[1m]))', 'Request', 'Service', NULL, true, false, 'SVC:{{$labels.xm_service_name}} Svc Active Transaction count/Seconds:{{humanize $value}}|{threshold}.', '2022-03-11 15:51:45.946', '2022-03-11 15:51:45.946') ON
CONFLICT (id) DO
UPDATE
SET
expr = 'sum by(xm_clst_id, xm_namespace, xm_service_name) (rate(imxc_txn_active_count {{filter}}[1m]))'
WHERE id = 'imxc_jspd_active_txn_per_sec';
INSERT INTO public.metric_meta2 (id, meta_name, description, expr, resource_type, entity_type, groupby_keys, in_use, anomaly_score, message, created_date, modified_date) VALUES('imxc_jspd_pod_active_txn_per_sec', 'Service Pod Active Transaction Count (per sec)', 'The number of active transaction counts per second for pod', 'sum by(xm_clst_id, xm_namespace, xm_service_name, xm_pod_id) (rate(imxc_txn_active_count{{filter}}[1m]))', 'Request', 'Service', NULL, true, false, 'SVC:{{$labels.xm_service_name}} Svc Pod Active Transaction count/Seconds:{{humanize $value}}|{threshold}.', '2022-03-11 15:53:29.252', '2022-03-11 15:53:29.252') ON
CONFLICT (id) DO
UPDATE
SET
expr = 'sum by(xm_clst_id, xm_namespace, xm_service_name, xm_pod_id) (rate(imxc_txn_active_count{{filter}}[1m]))'
WHERE id = 'imxc_jspd_pod_active_txn_per_sec';
--public.agent_install_file_info
UPDATE public.agent_install_file_info SET yaml = '---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: cloudmoa-cluster-role
rules:
- nonResourceURLs:
- "*"
verbs:
- get
- apiGroups:
- metrics.k8s.io
resources:
- pods
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- list
- watch
- update
- apiGroups:
- ""
resources:
- services
verbs:
- get
- list
- watch
- update
- apiGroups:
- ""
resources:
- nodes/stats
- endpoints
- namespaces
- events
verbs:
- get
- list
- watch
- apiGroups:
- apps
resources:
- daemonsets
- deployments
- deployments/scale
- replicasets
- replicasets/scale
- statefulsets
- statefulsets/scale
verbs:
- get
- list
- watch
- apiGroups:
- batch
resources:
- jobs
verbs:
- get
- list
- watch
- update
- apiGroups:
- batch
resources:
- cronjobs
verbs:
- get
- list
- update
- apiGroups:
- storage.j8s.io
resources:
- storageclasses
verbs:
- get
- list
- apiGroups:
- networking.k8s.io
resources:
- ingresses
verbs:
- get
- list
- apiGroups:
- extensions
resources:
- ingresses
verbs:
- get
- list
- apiGroups:
- policy
resources:
- podsecuritypolicies
verbs:
- use
resourceNames:
- imxc-ps
- apiGroups:
- certificates.k8s.io
resourceNames:
- kubernetes.io/kube-apiserver-client-kubelet
resources:
- signers
verbs:
- approve
- apiGroups:
- certificates.k8s.io
resourceNames:
- kubernetes.io/kubelet-serving
resources:
- signers
verbs:
- approve
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- proxy
- apiGroups:
- ""
resources:
- nodes/log
- nodes/metrics
- nodes/proxy
- nodes/spec
- nodes/stats
verbs:
- ''*''
- apiGroups:
- ''*''
resources:
- ''*''
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: cloudmoa-restricted-rb
namespace: $CLOUDMOA_NAMESPACE
subjects:
- kind: ServiceAccount
name: default
namespace: $CLOUDMOA_NAMESPACE
roleRef:
kind: ClusterRole
name: cloudmoa-cluster-role
apiGroup: rbac.authorization.k8s.io
---
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:
name: cloudmoa-psp
namespace: $CLOUDMOA_NAMESPACE
spec:
privileged: true
seLinux:
rule: RunAsAny
supplementalGroups:
rule: RunAsAny
runAsUser:
rule: RunAsAny
fsGroup:
rule: RunAsAny
hostPorts:
- max: 65535
min: 0
hostNetwork: true
hostPID: true
volumes:
- configMap
- secret
- emptyDir
- hostPath
- projected
- downwardAPI
- persistentVolumeClaim
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: cloudmoa-topology-agent
namespace: $CLOUDMOA_NAMESPACE
labels:
app: cloudmoa-topology-agent
spec:
selector:
matchLabels:
app: cloudmoa-topology-agent
template:
metadata:
labels:
app: cloudmoa-topology-agent
spec:
hostNetwork: true
hostPID: true
tolerations:
- effect: NoSchedule
operator: Exists
- effect: NoExecute
operator: Exists
containers:
- name: cloudmoa-topology-agent
image: $DOCKER_REGISTRY_URL/topology-agent:$IMAGE_TAG
imagePullPolicy: Always
resources:
requests:
cpu: 200m
memory: 512Mi
limits:
cpu: 500m
memory: 600Mi
securityContext:
privileged: true
volumeMounts:
- mountPath: /host/usr/bin
name: bin-volume
- mountPath: /var/run/docker.sock
name: docker-volume
- mountPath: /host/proc
name: proc-volume
- mountPath: /root
name: root-volume
- mountPath: /log
name: log-volume
env:
- name: DATAGATE
value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT
- name: CLUSTER_ID
value: $CLOUDMOA_CLUSTER_ID
- name: ROOT_DIRECTORY
value: /root
- name: NODE_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: spec.nodeName
- name: POD_ID
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: LOG_LEVEL
value: "INFO"
volumes:
- name: bin-volume
hostPath:
path: /usr/bin
type: Directory
- name: docker-volume
hostPath:
path: /var/run/docker.sock
- name: proc-volume
hostPath:
path: /proc
- name: root-volume
hostPath:
path: /
- name: log-volume
hostPath:
path: /home'::text WHERE id = 2::bigint;
UPDATE public.agent_install_file_info SET yaml = '---
apiVersion: v1
kind: ConfigMap
metadata:
name: cloudmoa-metric-agent-config
namespace: $CLOUDMOA_NAMESPACE
data:
scaling.rules: |
groups:
- name: scaleup
rules :
- alert : ScaleUpRule
expr: job:webapp_config_open_sessions_current_count:sum > 15
annotations:
summary: "Scale up when current sessions is greater than 15"
description: "Firing when total sessions active greater than 15"
metric-agent.yml: |
global:
scrape_interval: 15s
scrape_configs:
- job_name: ''kubernetes-kubelet''
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- target_label: xm_entity_type
replacement: ''Node''
metric_relabel_configs:
- source_labels: [ __name__ ]
regex: (kubelet_running_pod_count)
action: keep
- job_name: ''kubernetes-node-exporter''
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- source_labels: [__meta_kubernetes_role]
action: replace
target_label: kubernetes_role
- source_labels: [__address__]
regex: ''(.*):10250''
replacement: ''${1}:9110''
target_label: __address__
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: __instance__
# set "name" value to "job"
- source_labels: [job]
regex: ''kubernetes-(.*)''
replacement: ''${1}''
target_label: name
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_entity_type
replacement: ''Node''
- source_labels: [__meta_kubernetes_namespace]
separator: ;
regex: (.*)
target_label: xm_namespace
replacement: $1
action: replace
metric_relabel_configs:
- source_labels: [ __name__ ]
regex: (node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total)
action: keep
- job_name: ''kubernetes-cadvisor''
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
metrics_path: /metrics/cadvisor
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_entity_type
replacement: ''Container''
metric_relabel_configs:
- source_labels: [namespace]
target_label: xm_namespace
- source_labels: [pod_name]
target_label: xm_pod_id
- source_labels: [container_name]
target_label: xm_cont_name
- source_labels: [id]
target_label: xm_cont_id
- source_labels: [container_name]
regex: (.+)
action: keep
- source_labels: [ __name__ ]
regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes)
action: keep
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: cloudmoa-metric-agent
namespace: $CLOUDMOA_NAMESPACE
labels:
app: cloudmoa-metric-agent
spec:
selector:
matchLabels:
app: cloudmoa-metric-agent
replicas: 1
strategy:
type: Recreate
template:
metadata:
labels:
app: cloudmoa-metric-agent
spec:
containers:
- name: cloudmoa-metric-agent
image: $DOCKER_REGISTRY_URL/metric-agent:$IMAGE_TAG
args:
- --config.file=/etc/metric-agent/metric-agent.yml
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
cpu: 300m
memory: 1000Mi
volumeMounts:
- mountPath: /etc/metric-agent/
name: config-volume
env:
- name: LOG_LEVEL
value: "INFO"
- name: CLUSTER_ID
value: $CLOUDMOA_CLUSTER_ID
- name: DATAGATE
value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT
- name: STORAGE_TYPE
value: datagate
restartPolicy: Always
volumes:
- name: config-volume
configMap:
name: cloudmoa-metric-agent-config
'::text WHERE id = 3::bigint;
UPDATE public.agent_install_file_info SET yaml = '---
apiVersion: v1
kind: List
items:
- apiVersion: apps/v1
kind: Deployment
metadata:
name: cloudmoa-trace-agent
namespace: $CLOUDMOA_NAMESPACE
labels:
app: cloudmoa-trace-agent
spec:
selector:
matchLabels:
app: cloudmoa-trace-agent
replicas: 1
strategy:
type: Recreate
template:
metadata:
labels:
app: cloudmoa-trace-agent
spec:
securityContext:
runAsNonRoot: true
runAsUser: 65534
containers:
- image: $DOCKER_REGISTRY_URL/trace-agent:$IMAGE_TAG
name: cloudmoa-trace-agent
resources:
requests:
cpu: 100m
memory: 50Mi
limits:
cpu: 200m
memory: 100Mi
ports:
- containerPort: 5775
protocol: UDP
- containerPort: 6831
protocol: UDP
- containerPort: 6832
protocol: UDP
- containerPort: 5778
protocol: TCP
env:
- name: LOG_LEVEL
value: "INFO"
- name: CLUSTER_ID
value: $CLOUDMOA_CLUSTER_ID
- name: DATAGATE
value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT
- apiVersion: v1
kind: Service
metadata:
name: cloudmoa-trace-agent
namespace: $CLOUDMOA_NAMESPACE
labels:
app: cloudmoa-trace-agent
spec:
ports:
- name: agent-zipkin-thrift
port: 5775
protocol: UDP
targetPort: 5775
- name: agent-compact
port: 6831
protocol: UDP
targetPort: 6831
- name: agent-binary
port: 6832
protocol: UDP
targetPort: 6832
- name: agent-configs
port: 5778
protocol: TCP
targetPort: 5778
selector:
app: cloudmoa-trace-agent
type: ClusterIP'::text WHERE id = 7::bigint;
UPDATE public.agent_install_file_info SET yaml = '---
apiVersion: v1
kind: Service
metadata:
annotations:
prometheus.io/scrape: ''true''
labels:
app: cloudmoa-node-exporter
name: cloudmoa-node-exporter
name: cloudmoa-node-exporter
namespace: $CLOUDMOA_NAMESPACE
spec:
clusterIP: None
ports:
- name: scrape
port: 9110
protocol: TCP
selector:
app: cloudmoa-node-exporter
type: ClusterIP
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: cloudmoa-node-exporter
namespace: $CLOUDMOA_NAMESPACE
spec:
selector:
matchLabels:
app: cloudmoa-node-exporter
template:
metadata:
labels:
app: cloudmoa-node-exporter
name: cloudmoa-node-exporter
spec:
tolerations:
- effect: NoSchedule
operator: Exists
- effect: NoExecute
operator: Exists
containers:
- image: $DOCKER_REGISTRY_URL/prom/node-exporter
name: cloudmoa-node-exporter
ports:
- containerPort: 9110
hostPort: 9110
name: scrape
args:
- --path.procfs=/host/proc
- --path.sysfs=/host/sys
- --path.rootfs=/host/root
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|run|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
- --collector.tcpstat
- --web.listen-address=:9110
# --log.level=debug
resources:
limits:
cpu: 250m
memory: 180Mi
requests:
cpu: 102m
memory: 180Mi
volumeMounts:
- mountPath: /host/proc
name: proc
readOnly: false
- mountPath: /host/sys
name: sys
readOnly: false
- mountPath: /host/root
mountPropagation: HostToContainer
name: root
readOnly: true
hostNetwork: true
hostPID: true
securityContext:
runAsNonRoot: true
runAsUser: 65534
volumes:
- hostPath:
path: /proc
name: proc
- hostPath:
path: /sys
name: sys
- hostPath:
path: /
name: root
'::text WHERE id = 4::bigint;

View File

@@ -0,0 +1,459 @@
UPDATE public.agent_install_file_info SET yaml = '---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: cloudmoa-cluster-role
rules:
- nonResourceURLs:
- "*"
verbs:
- get
- apiGroups:
- metrics.k8s.io
resources:
- pods
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- list
- watch
- update
- apiGroups:
- ""
resources:
- services
verbs:
- get
- list
- watch
- update
- apiGroups:
- ""
resources:
- nodes/stats
- endpoints
- namespaces
- events
verbs:
- get
- list
- watch
- apiGroups:
- apps
resources:
- daemonsets
- deployments
- deployments/scale
- replicasets
- replicasets/scale
- statefulsets
- statefulsets/scale
verbs:
- get
- list
- watch
- apiGroups:
- batch
resources:
- jobs
verbs:
- get
- list
- watch
- update
- apiGroups:
- batch
resources:
- cronjobs
verbs:
- get
- list
- update
- apiGroups:
- storage.j8s.io
resources:
- storageclasses
verbs:
- get
- list
- apiGroups:
- networking.k8s.io
resources:
- ingresses
verbs:
- get
- list
- apiGroups:
- extensions
resources:
- ingresses
verbs:
- get
- list
- apiGroups:
- policy
resources:
- podsecuritypolicies
verbs:
- use
resourceNames:
- imxc-ps
- apiGroups:
- certificates.k8s.io
resourceNames:
- kubernetes.io/kube-apiserver-client-kubelet
resources:
- signers
verbs:
- approve
- apiGroups:
- certificates.k8s.io
resourceNames:
- kubernetes.io/kubelet-serving
resources:
- signers
verbs:
- approve
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- proxy
- apiGroups:
- ""
resources:
- nodes/log
- nodes/metrics
- nodes/proxy
- nodes/spec
- nodes/stats
verbs:
- ''*''
- apiGroups:
- ''*''
resources:
- ''*''
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: cloudmoa-restricted-rb
namespace: $CLOUDMOA_NAMESPACE
subjects:
- kind: ServiceAccount
name: default
namespace: $CLOUDMOA_NAMESPACE
roleRef:
kind: ClusterRole
name: cloudmoa-cluster-role
apiGroup: rbac.authorization.k8s.io
---
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:
name: cloudmoa-psp
namespace: $CLOUDMOA_NAMESPACE
spec:
privileged: true
seLinux:
rule: RunAsAny
supplementalGroups:
rule: RunAsAny
runAsUser:
rule: RunAsAny
fsGroup:
rule: RunAsAny
hostPorts:
- max: 65535
min: 0
hostNetwork: true
hostPID: true
volumes:
- configMap
- secret
- emptyDir
- hostPath
- projected
- downwardAPI
- persistentVolumeClaim
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: cloudmoa-topology-agent
namespace: $CLOUDMOA_NAMESPACE
labels:
app: cloudmoa-topology-agent
spec:
selector:
matchLabels:
app: cloudmoa-topology-agent
template:
metadata:
labels:
app: cloudmoa-topology-agent
spec:
hostNetwork: true
hostPID: true
tolerations:
- effect: NoSchedule
operator: Exists
- effect: NoExecute
operator: Exists
containers:
- name: cloudmoa-topology-agent
image: $DOCKER_REGISTRY_URL/topology-agent:$IMAGE_TAG
imagePullPolicy: Always
resources:
requests:
cpu: 200m
memory: 512Mi
limits:
cpu: 500m
memory: 600Mi
securityContext:
privileged: true
volumeMounts:
- mountPath: /host/usr/bin
name: bin-volume
- mountPath: /var/run/docker.sock
name: docker-volume
- mountPath: /host/proc
name: proc-volume
- mountPath: /root
name: root-volume
- mountPath: /log
name: log-volume
env:
- name: DATAGATE
value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT
- name: CLUSTER_ID
value: $CLOUDMOA_CLUSTER_ID
- name: ROOT_DIRECTORY
value: /root
- name: NODE_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: spec.nodeName
- name: POD_ID
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: LOG_LEVEL
value: "INFO"
volumes:
- name: bin-volume
hostPath:
path: /usr/bin
type: Directory
- name: docker-volume
hostPath:
path: /var/run/docker.sock
- name: proc-volume
hostPath:
path: /proc
- name: root-volume
hostPath:
path: /
- name: log-volume
hostPath:
path: /home' WHERE id = 2;
UPDATE public.agent_install_file_info SET yaml = '---
apiVersion: v1
kind: ConfigMap
metadata:
name: cloudmoa-metric-agent-config
namespace: $CLOUDMOA_NAMESPACE
data:
scaling.rules: |
groups:
- name: scaleup
rules :
- alert : ScaleUpRule
expr: job:webapp_config_open_sessions_current_count:sum > 15
annotations:
summary: "Scale up when current sessions is greater than 15"
description: "Firing when total sessions active greater than 15"
metric-agent.yml: |
global:
scrape_interval: 15s
scrape_configs:
- job_name: ''kubernetes-kubelet''
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- target_label: xm_entity_type
replacement: ''Node''
metric_relabel_configs:
- source_labels: [ __name__ ]
regex: (kubelet_running_pod_count)
action: keep
- job_name: ''kubernetes-node-exporter''
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- source_labels: [__meta_kubernetes_role]
action: replace
target_label: kubernetes_role
- source_labels: [__address__]
regex: ''(.*):10250''
replacement: ''${1}:9110''
target_label: __address__
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: __instance__
# set "name" value to "job"
- source_labels: [job]
regex: ''kubernetes-(.*)''
replacement: ''${1}''
target_label: name
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_entity_type
replacement: ''Node''
- source_labels: [__meta_kubernetes_namespace]
separator: ;
regex: (.*)
target_label: xm_namespace
replacement: $1
action: replace
metric_relabel_configs:
- source_labels: [ __name__ ]
regex: (node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total)
action: keep
- job_name: ''kubernetes-cadvisor''
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
metrics_path: /metrics/cadvisor
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_entity_type
replacement: ''Container''
metric_relabel_configs:
- source_labels: [namespace]
target_label: xm_namespace
- source_labels: [pod]
target_label: xm_pod_id
- source_labels: [container]
target_label: xm_cont_name
- source_labels: [id]
target_label: xm_cont_id
- source_labels: [container]
regex: (.+)
action: keep
- source_labels: [ __name__ ]
regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes)
action: keep
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: cloudmoa-metric-agent
namespace: $CLOUDMOA_NAMESPACE
labels:
app: cloudmoa-metric-agent
spec:
selector:
matchLabels:
app: cloudmoa-metric-agent
replicas: 1
strategy:
type: Recreate
template:
metadata:
labels:
app: cloudmoa-metric-agent
spec:
containers:
- name: cloudmoa-metric-agent
image: $DOCKER_REGISTRY_URL/metric-agent:$IMAGE_TAG
args:
- --config.file=/etc/metric-agent/metric-agent.yml
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
cpu: 300m
memory: 1000Mi
volumeMounts:
- mountPath: /etc/metric-agent/
name: config-volume
env:
- name: LOG_LEVEL
value: "INFO"
- name: LOG_MAXAGE
value: "1"
- name: CLUSTER_ID
value: $CLOUDMOA_CLUSTER_ID
- name: DATAGATE
value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT
- name: STORAGE_TYPE
value: datagate
restartPolicy: Always
volumes:
- name: config-volume
configMap:
name: cloudmoa-metric-agent-config
' WHERE id = 6;

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,8 @@
-- admin의 owner 속성 추가
UPDATE cloud_user SET is_tenant_owner = true WHERE user_id = 'admin';
-- owner에 대한 종속성을 admin으로 이관기능(필요하면 사용)
UPDATE auth_resource3 SET name = replace(name, 'owner', 'admin') WHERE name like '%|owner|%';
-- CLOUD-2305 node_memory_used metric_meta node_memory_SReclaimable_bytes 제거 패치문 반영
UPDATE metric_meta2 SET expr = '((node_memory_MemTotal_bytes{xm_entity_type="Node", {filter}} - (node_memory_MemFree_bytes{xm_entity_type="Node", {filter}} + node_memory_Cached_bytes{xm_entity_type="Node", {filter}} + node_memory_Buffers_bytes{xm_entity_type="Node", {filter}})) >= 0 or node_memory_MemTotal_bytes{xm_entity_type="Node", {filter}} - node_memory_MemFree_bytes{xm_entity_type="Node", {filter}}) / 1024 / 1024 / 1024' WHERE id = 'node_memory_used';

View File

@@ -0,0 +1,361 @@
-- agent_install_file_info
UPDATE public.agent_install_file_info SET yaml = '---
apiVersion: v1
kind: ConfigMap
metadata:
name: cloudmoa-metric-agent-config
namespace: $CLOUDMOA_NAMESPACE
data:
scaling.rules: |
groups:
- name: scaleup
rules :
- alert : ScaleUpRule
expr: job:webapp_config_open_sessions_current_count:sum > 15
annotations:
summary: "Scale up when current sessions is greater than 15"
description: "Firing when total sessions active greater than 15"
metric-agent.yml: |
global:
scrape_interval: 15s
scrape_configs:
- job_name: ''kubernetes-kubelet''
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- target_label: xm_entity_type
replacement: ''Node''
metric_relabel_configs:
- source_labels: [ __name__ ]
regex: (kubelet_running_pod_count)
action: keep
- job_name: ''kubernetes-node-exporter''
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- source_labels: [__meta_kubernetes_role]
action: replace
target_label: kubernetes_role
- source_labels: [__address__]
regex: ''(.*):10250''
replacement: ''${1}:9110''
target_label: __address__
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: __instance__
# set "name" value to "job"
- source_labels: [job]
regex: ''kubernetes-(.*)''
replacement: ''${1}''
target_label: name
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_entity_type
replacement: ''Node''
- source_labels: [__meta_kubernetes_namespace]
separator: ;
regex: (.*)
target_label: xm_namespace
replacement: $1
action: replace
metric_relabel_configs:
- source_labels: [ __name__ ]
regex: (node_memory_SReclaimable_bytes|node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total)
action: keep
- job_name: ''kubernetes-cadvisor''
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
metrics_path: /metrics/cadvisor
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_entity_type
replacement: ''Container''
metric_relabel_configs:
- source_labels: [namespace]
target_label: xm_namespace
- source_labels: [pod_name]
target_label: xm_pod_id
- source_labels: [container_name]
target_label: xm_cont_name
- source_labels: [id]
target_label: xm_cont_id
- source_labels: [container_name]
regex: (.+)
action: keep
- source_labels: [ __name__ ]
regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes)
action: keep
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: cloudmoa-metric-agent
namespace: $CLOUDMOA_NAMESPACE
labels:
app: cloudmoa-metric-agent
spec:
selector:
matchLabels:
app: cloudmoa-metric-agent
replicas: 1
strategy:
type: Recreate
template:
metadata:
labels:
app: cloudmoa-metric-agent
spec:
containers:
- name: cloudmoa-metric-agent
image: $DOCKER_REGISTRY_URL/metric-agent:$IMAGE_TAG
args:
- --config.file=/etc/metric-agent/metric-agent.yml
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
cpu: 300m
memory: 1000Mi
volumeMounts:
- mountPath: /etc/metric-agent/
name: config-volume
env:
- name: LOG_LEVEL
value: "INFO"
- name: CLUSTER_ID
value: $CLOUDMOA_CLUSTER_ID
- name: DATAGATE
value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT
- name: STORAGE_TYPE
value: datagate
restartPolicy: Always
volumes:
- name: config-volume
configMap:
name: cloudmoa-metric-agent-config
'::text WHERE id = 3::bigint;
UPDATE public.agent_install_file_info SET yaml = '---
apiVersion: v1
kind: ConfigMap
metadata:
name: cloudmoa-metric-agent-config
namespace: $CLOUDMOA_NAMESPACE
data:
scaling.rules: |
groups:
- name: scaleup
rules :
- alert : ScaleUpRule
expr: job:webapp_config_open_sessions_current_count:sum > 15
annotations:
summary: "Scale up when current sessions is greater than 15"
description: "Firing when total sessions active greater than 15"
metric-agent.yml: |
global:
scrape_interval: 15s
scrape_configs:
- job_name: ''kubernetes-kubelet''
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- target_label: xm_entity_type
replacement: ''Node''
metric_relabel_configs:
- source_labels: [ __name__ ]
regex: (kubelet_running_pod_count)
action: keep
- job_name: ''kubernetes-node-exporter''
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- source_labels: [__meta_kubernetes_role]
action: replace
target_label: kubernetes_role
- source_labels: [__address__]
regex: ''(.*):10250''
replacement: ''${1}:9110''
target_label: __address__
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: __instance__
# set "name" value to "job"
- source_labels: [job]
regex: ''kubernetes-(.*)''
replacement: ''${1}''
target_label: name
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_entity_type
replacement: ''Node''
- source_labels: [__meta_kubernetes_namespace]
separator: ;
regex: (.*)
target_label: xm_namespace
replacement: $1
action: replace
metric_relabel_configs:
- source_labels: [ __name__ ]
regex: (node_memory_SReclaimable_bytes|node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total)
action: keep
- job_name: ''kubernetes-cadvisor''
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
metrics_path: /metrics/cadvisor
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_entity_type
replacement: ''Container''
metric_relabel_configs:
- source_labels: [namespace]
target_label: xm_namespace
- source_labels: [pod]
target_label: xm_pod_id
- source_labels: [container]
target_label: xm_cont_name
- source_labels: [id]
target_label: xm_cont_id
- source_labels: [container]
regex: (.+)
action: keep
- source_labels: [ __name__ ]
regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes)
action: keep
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: cloudmoa-metric-agent
namespace: $CLOUDMOA_NAMESPACE
labels:
app: cloudmoa-metric-agent
spec:
selector:
matchLabels:
app: cloudmoa-metric-agent
replicas: 1
strategy:
type: Recreate
template:
metadata:
labels:
app: cloudmoa-metric-agent
spec:
containers:
- name: cloudmoa-metric-agent
image: $DOCKER_REGISTRY_URL/metric-agent:$IMAGE_TAG
args:
- --config.file=/etc/metric-agent/metric-agent.yml
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
cpu: 300m
memory: 1000Mi
volumeMounts:
- mountPath: /etc/metric-agent/
name: config-volume
env:
- name: LOG_LEVEL
value: "INFO"
- name: LOG_MAXAGE
value: "1"
- name: CLUSTER_ID
value: $CLOUDMOA_CLUSTER_ID
- name: DATAGATE
value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT
- name: STORAGE_TYPE
value: datagate
restartPolicy: Always
volumes:
- name: config-volume
configMap:
name: cloudmoa-metric-agent-config
'::text WHERE id = 6::bigint;
-- CLOUD-2798 pod_phase_count_by_cluster metric_meta 수정
UPDATE metric_meta2 SET expr = 'count by(xm_clst_id, pod_state) (sum by (xm_clst_id, xm_pod_id, pod_state)(rate(imxc_kubernetes_container_resource_limit_cpu{{filter}}[1m])))' WHERE id = 'pod_phase_count_by_cluster';
-- node_memory_usage 수정
update metric_meta2 set expr = 'sum by (xm_node_id)((node_memory_MemTotal_bytes{xm_entity_type="Node"}- (node_memory_MemFree_bytes{xm_entity_type="Node"} + node_memory_Cached_bytes{xm_entity_type="Node"} + node_memory_Buffers_bytes{xm_entity_type="Node"})) >= 0 or node_memory_MemTotal_bytes{xm_entity_type="Node"}- node_memory_MemFree_bytes{xm_entity_type="Node"}) / (sum by (xm_node_id) (imxc_kubernetes_node_resource_capacity_memory{{filter}})) * 100' where id = 'node_memory_usage';

View File

@@ -0,0 +1,360 @@
-- CLOUD-3473 Memory capacity 조회 쿼리 수정
update metric_meta2 set description = 'imxc_kubernetes_node_resource_capacity_memory',
expr = 'sum by (xm_clst_id) (imxc_kubernetes_node_resource_capacity_memory{{filter}})' where id = 'cluster_memory_capacity';
-- module명 metricdata owner_name 와 일치하도록 변경
update common_setting set code_value ='cmoa-collector' where code_id = 'Cloudmoa Collector';
update common_setting set code_value ='imxc-api' where code_id = 'Api Server';
update common_setting set code_value ='imxc-ui' where code_id = 'Ui Server';
update common_setting set code_value ='cloudmoa-trace-agent' where code_id = 'Trace Agent';
-- CLOUD-4795 Contaeird 환경 Container Network 수집 불가 건 확인
-- 22.10.08 현대카드 대응 건으로 release 3.4.6에 반영
UPDATE public.agent_install_file_info SET yaml = '---
apiVersion: v1
kind: ConfigMap
metadata:
name: cloudmoa-metric-agent-config
namespace: $CLOUDMOA_NAMESPACE
data:
scaling.rules: |
groups:
- name: scaleup
rules :
- alert : ScaleUpRule
expr: job:webapp_config_open_sessions_current_count:sum > 15
annotations:
summary: "Scale up when current sessions is greater than 15"
description: "Firing when total sessions active greater than 15"
metric-agent.yml: |
global:
scrape_interval: 15s
scrape_configs:
- job_name: ''kubernetes-kubelet''
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- target_label: xm_entity_type
replacement: ''Node''
metric_relabel_configs:
- source_labels: [ __name__ ]
regex: (kubelet_running_pod_count)
action: keep
- job_name: ''kubernetes-node-exporter''
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- source_labels: [__meta_kubernetes_role]
action: replace
target_label: kubernetes_role
- source_labels: [__address__]
regex: ''(.*):10250''
replacement: ''${1}:9110''
target_label: __address__
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: __instance__
# set "name" value to "job"
- source_labels: [job]
regex: ''kubernetes-(.*)''
replacement: ''${1}''
target_label: name
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_entity_type
replacement: ''Node''
- source_labels: [__meta_kubernetes_namespace]
separator: ;
regex: (.*)
target_label: xm_namespace
replacement: $1
action: replace
metric_relabel_configs:
- source_labels: [ __name__ ]
regex: (node_memory_SReclaimable_bytes|node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total)
action: keep
- job_name: ''kubernetes-cadvisor''
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
metrics_path: /metrics/cadvisor
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_entity_type
replacement: ''Container''
metric_relabel_configs:
- source_labels: [namespace]
target_label: xm_namespace
- source_labels: [pod]
target_label: xm_pod_id
- source_labels: [container]
target_label: xm_cont_name
- source_labels: [id]
target_label: xm_cont_id
- source_labels: [ __name__ ]
regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes)
action: keep
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: cloudmoa-metric-agent
namespace: $CLOUDMOA_NAMESPACE
labels:
app: cloudmoa-metric-agent
spec:
selector:
matchLabels:
app: cloudmoa-metric-agent
replicas: 1
strategy:
type: Recreate
template:
metadata:
labels:
app: cloudmoa-metric-agent
spec:
containers:
- name: cloudmoa-metric-agent
image: $DOCKER_REGISTRY_URL/metric-agent:$IMAGE_TAG
args:
- --config.file=/etc/metric-agent/metric-agent.yml
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
cpu: 300m
memory: 1000Mi
volumeMounts:
- mountPath: /etc/metric-agent/
name: config-volume
env:
- name: LOG_LEVEL
value: "INFO"
- name: LOG_MAXAGE
value: "1"
- name: CLUSTER_ID
value: $CLOUDMOA_CLUSTER_ID
- name: DATAGATE
value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT
- name: STORAGE_TYPE
value: datagate
restartPolicy: Always
volumes:
- name: config-volume
configMap:
name: cloudmoa-metric-agent-config
'::text WHERE id = 6::bigint;
UPDATE public.agent_install_file_info SET yaml = '---
apiVersion: v1
kind: ConfigMap
metadata:
name: cloudmoa-metric-agent-config
namespace: $CLOUDMOA_NAMESPACE
data:
scaling.rules: |
groups:
- name: scaleup
rules :
- alert : ScaleUpRule
expr: job:webapp_config_open_sessions_current_count:sum > 15
annotations:
summary: "Scale up when current sessions is greater than 15"
description: "Firing when total sessions active greater than 15"
metric-agent.yml: |
global:
scrape_interval: 15s
scrape_configs:
- job_name: ''kubernetes-kubelet''
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- target_label: xm_entity_type
replacement: ''Node''
metric_relabel_configs:
- source_labels: [ __name__ ]
regex: (kubelet_running_pod_count)
action: keep
- job_name: ''kubernetes-node-exporter''
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- source_labels: [__meta_kubernetes_role]
action: replace
target_label: kubernetes_role
- source_labels: [__address__]
regex: ''(.*):10250''
replacement: ''${1}:9110''
target_label: __address__
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: __instance__
# set "name" value to "job"
- source_labels: [job]
regex: ''kubernetes-(.*)''
replacement: ''${1}''
target_label: name
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_entity_type
replacement: ''Node''
- source_labels: [__meta_kubernetes_namespace]
separator: ;
regex: (.*)
target_label: xm_namespace
replacement: $1
action: replace
metric_relabel_configs:
- source_labels: [ __name__ ]
regex: (node_memory_SReclaimable_bytes|node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total)
action: keep
- job_name: ''kubernetes-cadvisor''
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
metrics_path: /metrics/cadvisor
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_entity_type
replacement: ''Container''
metric_relabel_configs:
- source_labels: [namespace]
target_label: xm_namespace
- source_labels: [pod_name]
target_label: xm_pod_id
- source_labels: [container_name]
target_label: xm_cont_name
- source_labels: [id]
target_label: xm_cont_id
- source_labels: [ __name__ ]
regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes)
action: keep
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: cloudmoa-metric-agent
namespace: $CLOUDMOA_NAMESPACE
labels:
app: cloudmoa-metric-agent
spec:
selector:
matchLabels:
app: cloudmoa-metric-agent
replicas: 1
strategy:
type: Recreate
template:
metadata:
labels:
app: cloudmoa-metric-agent
spec:
containers:
- name: cloudmoa-metric-agent
image: $DOCKER_REGISTRY_URL/metric-agent:$IMAGE_TAG
args:
- --config.file=/etc/metric-agent/metric-agent.yml
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
cpu: 300m
memory: 1000Mi
volumeMounts:
- mountPath: /etc/metric-agent/
name: config-volume
env:
- name: LOG_LEVEL
value: "INFO"
- name: CLUSTER_ID
value: $CLOUDMOA_CLUSTER_ID
- name: DATAGATE
value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT
- name: STORAGE_TYPE
value: datagate
restartPolicy: Always
volumes:
- name: config-volume
configMap:
name: cloudmoa-metric-agent-config'::text WHERE id = 3::bigint;

View File

@@ -0,0 +1,102 @@
-- CLOUD-4752 node_memory_usage alert 관련 쿼리 수정
update metric_meta2 set
expr = 'sum by (xm_clst_id, xm_node_id)((node_memory_MemTotal_bytes{xm_entity_type="Node"}- (node_memory_MemFree_bytes{xm_entity_type="Node"} + node_memory_Cached_bytes{xm_entity_type="Node"} + node_memory_Buffers_bytes{xm_entity_type="Node"})) >= 0 or node_memory_MemTotal_bytes{xm_entity_type="Node"}- node_memory_MemFree_bytes{xm_entity_type="Node"}) / (sum by (xm_clst_id, xm_node_id) (imxc_kubernetes_node_resource_capacity_memory{{filter}})) * 100'
where id = 'node_memory_usage';
-- CLOUD-6474 node-exporter | GPMAXPROCS 세팅
-- Auto-generated SQL script #202211241543
UPDATE public.agent_install_file_info
SET yaml='---
apiVersion: v1
kind: Service
metadata:
annotations:
prometheus.io/scrape: ''true''
labels:
app: cloudmoa-node-exporter
name: cloudmoa-node-exporter
name: cloudmoa-node-exporter
namespace: $CLOUDMOA_NAMESPACE
spec:
clusterIP: None
ports:
- name: scrape
port: 9110
protocol: TCP
selector:
app: cloudmoa-node-exporter
type: ClusterIP
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: cloudmoa-node-exporter
namespace: $CLOUDMOA_NAMESPACE
spec:
selector:
matchLabels:
app: cloudmoa-node-exporter
template:
metadata:
labels:
app: cloudmoa-node-exporter
name: cloudmoa-node-exporter
spec:
tolerations:
- effect: NoSchedule
operator: Exists
- effect: NoExecute
operator: Exists
containers:
- image: $DOCKER_REGISTRY_URL/node-exporter
name: cloudmoa-node-exporter
ports:
- containerPort: 9110
hostPort: 9110
name: scrape
args:
- --path.procfs=/host/proc
- --path.sysfs=/host/sys
- --path.rootfs=/host/root
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|run|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
- --collector.tcpstat
- --web.listen-address=:9110
# --log.level=debug
env:
- name: GOMAXPROCS
value: "1"
resources:
limits:
cpu: 250m
memory: 180Mi
requests:
cpu: 102m
memory: 180Mi
volumeMounts:
- mountPath: /host/proc
name: proc
readOnly: false
- mountPath: /host/sys
name: sys
readOnly: false
- mountPath: /host/root
mountPropagation: HostToContainer
name: root
readOnly: true
hostNetwork: true
hostPID: true
securityContext:
runAsNonRoot: true
runAsUser: 65534
volumes:
- hostPath:
path: /proc
name: proc
- hostPath:
path: /sys
name: sys
- hostPath:
path: /
name: root
'
WHERE id=4;

View File

@@ -0,0 +1,387 @@
-- CLOUD-6526 host 관련 쿼리 수정
-- 수집된 메트릭 시간차로 인해 데이터 표출이 안되는걸 방지하기 위해 rate 5m 추가
UPDATE metric_meta2 SET expr = 'sum by (data_type, instance) (
label_replace(rate(node_network_receive_bytes_total{{filter}}[1m]) or rate(node_network_receive_bytes_total{{filter}}[5m]), "data_type", "Receive", "", "") or
label_replace(rate(node_network_transmit_bytes_total{{filter}}[1m]) or rate(node_network_transmit_bytes_total{{filter}}[5m]), "data_type", "Transmit", "", "") )'
WHERE id='host_network_io_byte';
UPDATE public.metric_meta2 SET expr = 'sum by (data_type, instance) (
label_replace(rate(node_disk_read_bytes_total{{filter}}[1m]) or rate(node_disk_read_bytes_total{{filter}}[5m]), "data_type", "Read", "", "") or
label_replace(rate(node_disk_written_bytes_total{{filter}}[1m]) or rate(node_disk_written_bytes_total{{filter}}[5m]), "data_type", "Write", "", "") )'
WHERE id = 'host_disk_read_write_byte';
UPDATE public.metric_meta2 SET expr = 'sum by (instance) (
(rate(node_disk_reads_completed_total{{filter}}[1m]) + rate(node_disk_writes_completed_total{{filter}}[1m])) or
(rate(node_disk_reads_completed_total{{filter}}[5m]) + rate(node_disk_writes_completed_total{{filter}}[5m])))'
WHERE id = 'host_disk_iops';
-- CLOUD-8671 Metric-Agent | 데이터 필터링 설정 추가
-- Workload > Pod 화면 등에 Docker 런타임 환경의 자원 사용량이 2배 가량으로 보이던 문제 픽스
UPDATE public.agent_install_file_info
SET yaml='---
apiVersion: v1
kind: ConfigMap
metadata:
name: cloudmoa-metric-agent-config
namespace: $CLOUDMOA_NAMESPACE
data:
scaling.rules: |
groups:
- name: scaleup
rules :
- alert : ScaleUpRule
expr: job:webapp_config_open_sessions_current_count:sum > 15
annotations:
summary: "Scale up when current sessions is greater than 15"
description: "Firing when total sessions active greater than 15"
metric-agent.yml: |
global:
scrape_interval: 15s
scrape_configs:
- job_name: ''kubernetes-kubelet''
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- target_label: xm_entity_type
replacement: ''Node''
metric_relabel_configs:
- source_labels: [ __name__ ]
regex: (kubelet_running_pod_count)
action: keep
- job_name: ''kubernetes-node-exporter''
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- source_labels: [__meta_kubernetes_role]
action: replace
target_label: kubernetes_role
- source_labels: [__address__]
regex: ''(.*):10250''
replacement: ''${1}:9110''
target_label: __address__
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: __instance__
# set "name" value to "job"
- source_labels: [job]
regex: ''kubernetes-(.*)''
replacement: ''${1}''
target_label: name
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_entity_type
replacement: ''Node''
- source_labels: [__meta_kubernetes_namespace]
separator: ;
regex: (.*)
target_label: xm_namespace
replacement: $1
action: replace
metric_relabel_configs:
- source_labels: [ __name__ ]
regex: (node_memory_SReclaimable_bytes|node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total)
action: keep
- job_name: ''kubernetes-cadvisor''
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
metrics_path: /metrics/cadvisor
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_entity_type
replacement: ''Container''
metric_relabel_configs:
- source_labels: [namespace]
target_label: xm_namespace
- source_labels: [pod_name]
target_label: xm_pod_id
- source_labels: [container_name]
target_label: xm_cont_name
- source_labels: [id]
target_label: xm_cont_id
- source_labels: [ __name__ ]
regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes)
action: keep
- source_labels: [ __name__, image ]
separator: "@"
regex: "container_cpu.*@"
action: drop
- source_labels: [ __name__, name ]
separator: "@"
regex: "container_memory.*@"
action: drop
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: cloudmoa-metric-agent
namespace: $CLOUDMOA_NAMESPACE
labels:
app: cloudmoa-metric-agent
spec:
selector:
matchLabels:
app: cloudmoa-metric-agent
replicas: 1
strategy:
type: Recreate
template:
metadata:
labels:
app: cloudmoa-metric-agent
spec:
containers:
- name: cloudmoa-metric-agent
image: $DOCKER_REGISTRY_URL/metric-agent:$IMAGE_TAG
args:
- --config.file=/etc/metric-agent/metric-agent.yml
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
cpu: 300m
memory: 1000Mi
volumeMounts:
- mountPath: /etc/metric-agent/
name: config-volume
env:
- name: LOG_LEVEL
value: "INFO"
- name: CLUSTER_ID
value: $CLOUDMOA_CLUSTER_ID
- name: DATAGATE
value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT
- name: STORAGE_TYPE
value: datagate
restartPolicy: Always
volumes:
- name: config-volume
configMap:
name: cloudmoa-metric-agent-config
'
WHERE id=3;
UPDATE public.agent_install_file_info
SET yaml='---
apiVersion: v1
kind: ConfigMap
metadata:
name: cloudmoa-metric-agent-config
namespace: $CLOUDMOA_NAMESPACE
data:
scaling.rules: |
groups:
- name: scaleup
rules :
- alert : ScaleUpRule
expr: job:webapp_config_open_sessions_current_count:sum > 15
annotations:
summary: "Scale up when current sessions is greater than 15"
description: "Firing when total sessions active greater than 15"
metric-agent.yml: |
global:
scrape_interval: 15s
scrape_configs:
- job_name: ''kubernetes-kubelet''
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- target_label: xm_entity_type
replacement: ''Node''
metric_relabel_configs:
- source_labels: [ __name__ ]
regex: (kubelet_running_pod_count)
action: keep
- job_name: ''kubernetes-node-exporter''
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- source_labels: [__meta_kubernetes_role]
action: replace
target_label: kubernetes_role
- source_labels: [__address__]
regex: ''(.*):10250''
replacement: ''${1}:9110''
target_label: __address__
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: __instance__
# set "name" value to "job"
- source_labels: [job]
regex: ''kubernetes-(.*)''
replacement: ''${1}''
target_label: name
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_entity_type
replacement: ''Node''
- source_labels: [__meta_kubernetes_namespace]
separator: ;
regex: (.*)
target_label: xm_namespace
replacement: $1
action: replace
metric_relabel_configs:
- source_labels: [ __name__ ]
regex: (node_memory_SReclaimable_bytes|node_boot_time_seconds|node_context_switches_total|node_cpu_frequency_max_hertz|node_cpu_package_throttles_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filesystem_avail_bytes|node_filesystem_free_bytes|node_filesystem_size_bytes|node_load1|node_load15|node_load5|node_memory_Active_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SwapCached_bytes|node_memory_SwapFree_bytes|node_memory_SwapTotal_bytes|node_network_receive_bytes_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_transmit_bytes_total)
action: keep
- job_name: ''kubernetes-cadvisor''
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
metrics_path: /metrics/cadvisor
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: xm_clst_id
replacement: ''$CLOUDMOA_CLUSTER_ID''
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: xm_node_id
- target_label: xm_entity_type
replacement: ''Container''
metric_relabel_configs:
- source_labels: [namespace]
target_label: xm_namespace
- source_labels: [pod]
target_label: xm_pod_id
- source_labels: [container]
target_label: xm_cont_name
- source_labels: [id]
target_label: xm_cont_id
- source_labels: [ __name__ ]
regex: (container_cpu_cfs_throttled_seconds_total|container_cpu_system_seconds_total|container_cpu_usage_seconds_total|container_cpu_user_seconds_total|container_fs_limit_bytes|container_fs_reads_bytes_total|container_fs_usage_bytes|container_fs_writes_bytes_total|container_last_seen|container_memory_cache|container_memory_max_usage_bytes|container_memory_swap|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_spec_memory_limit_bytes)
action: keep
- source_labels: [ __name__, image ]
separator: "@"
regex: "container_cpu.*@"
action: drop
- source_labels: [ __name__, name ]
separator: "@"
regex: "container_memory.*@"
action: drop
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: cloudmoa-metric-agent
namespace: $CLOUDMOA_NAMESPACE
labels:
app: cloudmoa-metric-agent
spec:
selector:
matchLabels:
app: cloudmoa-metric-agent
replicas: 1
strategy:
type: Recreate
template:
metadata:
labels:
app: cloudmoa-metric-agent
spec:
containers:
- name: cloudmoa-metric-agent
image: $DOCKER_REGISTRY_URL/metric-agent:$IMAGE_TAG
args:
- --config.file=/etc/metric-agent/metric-agent.yml
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
cpu: 300m
memory: 1000Mi
volumeMounts:
- mountPath: /etc/metric-agent/
name: config-volume
env:
- name: LOG_LEVEL
value: "INFO"
- name: LOG_MAXAGE
value: "1"
- name: CLUSTER_ID
value: $CLOUDMOA_CLUSTER_ID
- name: DATAGATE
value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT
- name: STORAGE_TYPE
value: datagate
restartPolicy: Always
volumes:
- name: config-volume
configMap:
name: cloudmoa-metric-agent-config
'
WHERE id=6;

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,48 @@
-- CLOUD-16405 Metric Meta | 대시보드 관련 메트릭 수정
-- 대시보드 관련 메트릭 추가 및 오타 수정
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) VALUES
('redis_connected_slave_lag_seconds','Redis Connected Slave Lag Seconds','Redis Connected Slave Lag Seconds','sum by (xm_clst_id, xm_namespace, xm_node_id, instance, slave_ip, slave_port) (rate(redis_connected_slave_lag_seconds[1m]))','Worker','Redis',NULL,true,true,'Redis Connected Slave Lag Seconds','2023-03-16 14:39:57.420','2023-03-16 14:39:57.420'),
('redis_connected_slave_offset_bytes','Redis Connected Slave Offset Bytes','Redis Connected Slave Offset Bytes','sum by (xm_clst_id, xm_namespace, xm_node_id, instance, slave_ip, slave_port) (rate(redis_connected_slave_offset_bytes[1m]))','Worker','Redis',NULL,true,true,'Redis Connected Slave Offset Bytes','2023-03-16 14:37:43.734','2023-03-16 14:38:22.164'),
('redis_connected_slaves','Redis Connected Slaves','Redis Connected Slaves','sum by (xm_clst_id, xm_namespace, xm_node_id, instance) (redis_connected_slaves)','Worker','Redis',NULL,true,true,'Redis Connected Slaves','2023-03-16 13:57:09.423','2023-03-16 13:59:50.746');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) VALUES
('traefik_http_requests_bad_total_last_10m','Traefik HTTP Requests Bad Total Last 10m','Number bad requests in 10 minutes','sum(increase(traefik_service_requests_total{code=~"4[0-9]{2}|5[0-9]{2}"}[10m]))','Request','Traefik','service',true,false,'CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} POD:{{$labels.xm_pod_id}} Trafik Bad Request Count (10m) :{{humanize $value}}|{threshold}.','2023-03-17 14:10:13.163','2023-03-17 14:10:13.163'),
('traefik_http_requests_bad_total_last_1h','Traefik HTTP Requests Bad Total Last 1h','Number bad requests in 1 hour','sum(increase(traefik_service_requests_total{code=~"4[0-9]{2}|5[0-9]{2}"}[1h]))','Request','Traefik','service',true,false,'CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} POD:{{$labels.xm_pod_id}} Trafik Bad Request Count (1h) :{{humanize $value}}|{threshold}.','2023-03-17 14:10:13.163','2023-03-17 14:10:13.163');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message,created_date,modified_date) VALUES
('traefik_status_code_count','Traefik Status Code Count ','Traefik Status Code Count ','sum(increase(traefik_service_requests_total{protocol=~"http|https"}[10m])) by (code)','Request','Traefik','code',true,false,'None','2023-03-17 14:33:13.020','2023-03-17 14:33:13.020');
-- Auto-generated SQL script #202305081122
UPDATE public.metric_meta2
SET meta_name='Traefik HTTP Requests 5xx Total Last 1h'
WHERE id='traefik_http_requests_5xx_total_last_1h';
-- CLOUD-16467 Metric Meta | Secret 정보를 저장하는 Table 을 추가한다.
CREATE TABLE cmoa_secret_base (
kube_flatting_time bigint not null,
cluster_id varchar(255) not null,
kind varchar(30) not null,
metadata_uid varchar(40) not null,
row_index integer not null,
metadata_name text,
kind_status varchar(50),
metadata_resourceversion text,
metadata_annotations text,
metadata_creationtimestamp varchar(25),
metadata_labels text,
metadata_namespace text,
data text,
type text,
create_time timestamp default now(),
PRIMARY KEY (kube_flatting_time, cluster_id, kind, metadata_uid, row_index)
);
ALTER TABLE cmoa_pod_volume
add spec_volumes_persistentvolumeclaim text;
-- CLOUD-16396 Setting>Deploy List 에서 scale, yaml 에 대한 권한 때문에 menu 초기화
INSERT INTO public.auth_resource3 (name, is_deleted, tenant_id) VALUES ('menu|Workloads|Deploy List|Scale', false, null);
INSERT INTO public.auth_resource3 (name, is_deleted, tenant_id) VALUES ('menu|Workloads|Deploy List|Yaml', false, null);
INSERT INTO public.auth_resource3 (name, is_deleted, tenant_id) VALUES ('menu|Workloads|Deploy List|List', false, null);
INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) VALUES (13, 'deployList(List)', NULL, 5, '', (select id from auth_resource3 where name='menu|Workloads|Deploy List|List'), 0);
INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) VALUES (14, 'deployList(Scale)', NULL, 6, '', (select id from auth_resource3 where name='menu|Workloads|Deploy List|Scale'), 0);
INSERT INTO public.menu_meta (id, description, icon, "position", url, auth_resource3_id, scope_level) VALUES (15, 'deployList(Yaml)', NULL, 7, '', (select id from auth_resource3 where name='menu|Workloads|Deploy List|Yaml'), 0);

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,385 @@
-- CLOUD-19295 | Host Process 관련 metric meta 추가
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES ('host_process_cpu_second_total','Host Process CPU Second Total (%)','CPU user usage in seconds ( % )','sum by (groupname) (rate(namedprocess_namegroup_cpu_seconds_total{{filter}}[1m]))*100','Process','Host','','true','false','"Host:{{$labels.instance}} Process CPU :{{humanize $value}}|{threshold}."');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('host_process_memory_bytes','Host Process memory bytes (MiB)','number of bytes of memory in use / 1048576','sum by (groupname) ((namedprocess_namegroup_memory_bytes{ memtype="resident", {filter}})) / 1048576','Process','Host','','true','false','"Host:{{$labels.instance}} Process memory :{{humanize $value}}MiB|{threshold}MiB."');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('host_process_read_bytes','Host Process read bytes ( KiB)','number of bytes read by this group /1024','sum by (groupname) (rate(namedprocess_namegroup_read_bytes_total{{filter}}[1m]))/1024','Process','Host','','true','false','"Host:{{$labels.instance}} Disk Read Size:{{humanize $value}}KiB|{threshold}KiB."');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('host_process_write_bytes','Host Process write bytes(KiB)','number of bytes written by this group/1024','sum by (groupname) (rate(namedprocess_namegroup_write_bytes_total{{filter}}[1m]))/1024','Process','Host','','true','false','"Host:{{$labels.instance}} Disk Write Size:{{humanize $value}}KiB|{threshold}KiB."');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('host_process_count','Host Process number','number of processes in this group', 'sum by (groupname) (namedprocess_namegroup_num_procs{{filter}})', 'Process','Host','','true','false','"Host:{{$labels.instance}} Count:{{humanize $value}}|{threshold}."');
-- CLOUD-19283 Topology Agent 와 node exporter 통합
-- Auto-generated SQL script #202307261024
UPDATE public.agent_install_file_info
SET yaml='---',use_yn=false
WHERE id=4;
UPDATE public.agent_install_file_info
SET yaml='---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: cloudmoa-cluster-role
rules:
- nonResourceURLs:
- "*"
verbs:
- get
- apiGroups:
- metrics.k8s.io
resources:
- pods
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- list
- watch
- update
- apiGroups:
- ""
resources:
- services
verbs:
- get
- list
- watch
- update
- apiGroups:
- ""
resources:
- nodes/stats
- endpoints
- namespaces
- events
verbs:
- get
- list
- watch
- apiGroups:
- apps
resources:
- daemonsets
- deployments
- deployments/scale
- replicasets
- replicasets/scale
- statefulsets
- statefulsets/scale
verbs:
- get
- list
- watch
- update
- apiGroups:
- batch
resources:
- jobs
verbs:
- get
- list
- watch
- update
- apiGroups:
- batch
resources:
- cronjobs
verbs:
- get
- list
- update
- apiGroups:
- storage.j8s.io
resources:
- storageclasses
verbs:
- get
- list
- apiGroups:
- networking.k8s.io
resources:
- ingresses
verbs:
- get
- list
- apiGroups:
- extensions
resources:
- ingresses
verbs:
- get
- list
- apiGroups:
- policy
resources:
- podsecuritypolicies
verbs:
- use
resourceNames:
- imxc-ps
- apiGroups:
- certificates.k8s.io
resourceNames:
- kubernetes.io/kube-apiserver-client-kubelet
resources:
- signers
verbs:
- approve
- apiGroups:
- certificates.k8s.io
resourceNames:
- kubernetes.io/kubelet-serving
resources:
- signers
verbs:
- approve
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- proxy
- apiGroups:
- ""
resources:
- nodes/log
- nodes/metrics
- nodes/proxy
- nodes/spec
- nodes/stats
verbs:
- ''*''
- apiGroups:
- ''*''
resources:
- ''*''
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: cloudmoa-restricted-rb
namespace: $CLOUDMOA_NAMESPACE
subjects:
- kind: ServiceAccount
name: default
namespace: $CLOUDMOA_NAMESPACE
roleRef:
kind: ClusterRole
name: cloudmoa-cluster-role
apiGroup: rbac.authorization.k8s.io
---
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:
name: cloudmoa-psp
namespace: $CLOUDMOA_NAMESPACE
spec:
privileged: true
seLinux:
rule: RunAsAny
supplementalGroups:
rule: RunAsAny
runAsUser:
rule: RunAsAny
fsGroup:
rule: RunAsAny
hostPorts:
- max: 65535
min: 0
hostNetwork: true
hostPID: true
volumes:
- configMap
- secret
- emptyDir
- hostPath
- projected
- downwardAPI
- persistentVolumeClaim
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: cloudmoa-topology-agent
namespace: $CLOUDMOA_NAMESPACE
labels:
app: cloudmoa-topology-agent
spec:
selector:
matchLabels:
app: cloudmoa-topology-agent
template:
metadata:
labels:
app: cloudmoa-topology-agent
spec:
hostNetwork: true
hostPID: true
tolerations:
- effect: NoSchedule
operator: Exists
- effect: NoExecute
operator: Exists
containers:
- image: $DOCKER_REGISTRY_URL/node-exporter
name: node-agent
resources:
limits:
cpu: 250m
memory: 180Mi
requests:
cpu: 125m
memory: 90Mi
ports:
- containerPort: 9110
hostPort: 9110
name: scrape
args:
- --path.procfs=/host/proc
- --path.sysfs=/host/sys
- --path.rootfs=/host/root
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|run|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
- --collector.tcpstat
- --web.listen-address=:9110
# --log.level=debug
env:
- name: GOMAXPROCS
value: "1"
volumeMounts:
- mountPath: /host/proc
name: proc-volume
readOnly: false
- mountPath: /host/sys
name: sys-volume
readOnly: false
- mountPath: /host/root
mountPropagation: HostToContainer
name: root-volume
readOnly: true
- name: cloudmoa-topology-agent
image: $DOCKER_REGISTRY_URL/topology-agent:$IMAGE_TAG
resources:
requests:
cpu: 200m
memory: 512Mi
limits:
cpu: 500m
memory: 600Mi
securityContext:
privileged: true
volumeMounts:
- mountPath: /host/usr/bin
name: bin-volume
- mountPath: /var/run/docker.sock
name: docker-volume
- mountPath: /host/proc
name: proc-volume
- mountPath: /root
name: root-volume
- mountPath: /log
name: log-volume
env:
- name: DATAGATE
value: $COLLTION_SERVER_DATAGATE_IP:$COLLTION_SERVER_DATAGATE_PORT
- name: CLUSTER_ID
value: $CLOUDMOA_CLUSTER_ID
- name: ROOT_DIRECTORY
value: /root
- name: NODE_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: spec.nodeName
- name: POD_ID
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: LOG_LEVEL
value: "INFO"
volumes:
- name: bin-volume
hostPath:
path: /usr/bin
type: Directory
- name: docker-volume
hostPath:
path: /var/run/docker.sock
- name: proc-volume
hostPath:
path: /proc
- name: root-volume
hostPath:
path: /
- hostPath:
path: /sys
name: sys-volume
- name: log-volume
hostPath:
path: /home'
WHERE id=2;
-- CLOUD-19460 Nginx metric에 {filter} 추가
-- Nginx Ingress 대시보드 지원 관련 metric meta 추가
UPDATE public.metric_meta2 SET expr = 'sum by (xm_clst_id, xm_namespace, xm_node_id, instance, class) (nginx_ingress_nginx_up{ {filter} })' WHERE id ='nginx_ingress_nginx_up';
UPDATE public.metric_meta2 SET expr = 'sum by (xm_clst_id, xm_namespace, xm_node_id, instance, class) (nginx_ingress_nginx_connections_active{ {filter} })' WHERE id ='nginx_ingress_nginx_connections_active';
UPDATE public.metric_meta2 SET expr = 'sum by (xm_clst_id, xm_namespace, xm_node_id, instance, class) (nginx_ingress_controller_nginx_reload_errors_total{ {filter} })' WHERE id ='nginx_ingress_controller_nginx_last_reload_status';
UPDATE public.metric_meta2 SET expr = 'sum by (xm_clst_id, xm_namespace, xm_node_id, instance, class) (nginx_ingress_controller_nginx_reload_errors_total{ {filter} })' WHERE id ='nginx_ingress_controller_nginx_reload_errors_total';
UPDATE public.metric_meta2 SET expr = 'sum by (xm_clst_id, xm_namespace, xm_node_id, instance, class) (nginx_ingress_controller_nginx_reloads_total{ {filter} })' WHERE id ='nginx_ingress_controller_nginx_reloads_total';
UPDATE public.metric_meta2 SET expr = 'sum by (xm_clst_id, xm_namespace, xm_node_id, instance, class) (nginx_ingress_nginx_http_requests_total{ {filter} })' WHERE id ='nginx_ingress_nginx_http_requests_total';
UPDATE public.metric_meta2 SET expr = 'sum by (xm_clst_id, xm_namespace, xm_node_id, instance, class) (rate(nginx_ingress_nginx_http_requests_total{ {filter} }[1m]))' WHERE id ='nginx_ingress_nginx_http_requests_second';
-- Nginx 대시보드 지원 관련 metric meta 추가
UPDATE public.metric_meta2 SET expr = 'sum by (instance) (nginx_up{ {filter} })' WHERE id ='nginx_up';
UPDATE public.metric_meta2 SET expr = 'sum by (instance) (irate(nginx_connections_accepted{ {filter} }[5m]))' WHERE id ='nginx_connections_accepted';
UPDATE public.metric_meta2 SET expr = 'sum by (instance) (irate(nginx_connections_handled{ {filter} }[5m]))' WHERE id ='nginx_connections_handled';
UPDATE public.metric_meta2 SET expr = 'sum by (instance) (nginx_connections_active{ {filter} })' WHERE id ='nginx_connections_active';
UPDATE public.metric_meta2 SET expr = 'sum by (instance) (nginx_connections_reading{ {filter} })' WHERE id ='nginx_connections_reading';
UPDATE public.metric_meta2 SET expr = 'sum by (instance) (nginx_connections_waiting{ {filter} })' WHERE id ='nginx_connections_waiting';
UPDATE public.metric_meta2 SET expr = 'sum by (instance) (nginx_connections_writing{ {filter} })' WHERE id ='nginx_connections_writing';
UPDATE public.metric_meta2 SET expr = 'sum by (instance) (irate(nginx_http_requests_total{ {filter} }[1m]))' WHERE id ='nginx_http_requests_total';
-- Kube Event 관련 metric meta 추가
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_node','Kubernetes Node Event','Kubernetes Node Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="Node"})','Event','Node','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} ');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_job','Kubernetes Job Event','Kubernetes Job Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="Job"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} ');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_cronjob','Kubernetes CronJob Event','Kubernetes CronJob Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="CronJob"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} ');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_daemonset','Kubernetes DaemonSet Event','Kubernetes DaemonSet Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="DaemonSet"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} ');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_deployment','Kubernetes Deployment Event','Kubernetes Deployment Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="Deployment"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} ');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_endpoints','Kubernetes Endpoints Event','Kubernetes Endpoints Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="Endpoints"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} ');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_persistentvolumeclaim','Kubernetes PersistentVolumeClaim Event','Kubernetes PersistentVolumeClaim Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="PersistentVolumeClaim"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} ');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_pod','Kubernetes Pod Event','Kubernetes Pod Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="Pod"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} ');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_warning','Kubernetes Warning Event','Kubernetes Warning Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{type="Warning"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Name: {{$involved_name}} Reason : {{$labels.reason}} Level : {{$labels.type}} ');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_replicaset','Kubernetes ReplicaSet Event','Kubernetes ReplicaSet Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="ReplicaSet"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} ');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_statefulset','Kubernetes StatefulSet Event','Kubernetes StatefulSet Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="StatefulSet"})','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} ');
-- Event Alert 관련 metric meta 추가
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('event_alert_successful_create','Event Alert SuccessfulCreate','Event Alert SuccessfulCreate','count by (xm_clst_id, xm_namespace, entity_type, involved_kind, involved_name, reason, type) (imxc_kubernetes_event_in_last_min{reason=~"SuccessfulCreate", {filter} })','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} Name : {{$labels.involved_name}} ');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('event_alert_successful_delete','Event Alert SuccessfulDelete','Event Alert SuccessfulDelete','count by (xm_clst_id, xm_namespace, entity_type, involved_kind, involved_name, reason, type) (imxc_kubernetes_event_in_last_min{reason=~"SuccessfulDelete", {filter} })','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} Name : {{$labels.involved_name}} ');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('event_alert_deadline_exceeded','Event Alert DeadlineExceeded','Event Alert DeadlineExceeded','count by (xm_clst_id, xm_namespace, entity_type, involved_kind, involved_name, reason, type) (imxc_kubernetes_event_in_last_min{reason=~"DeadlineExceeded", {filter} })','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} Name : {{$labels.involved_name}} ');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('event_alert_job','Event Alert Job','Event Alert Job','count by (xm_clst_id, xm_namespace, entity_type, involved_kind, involved_name, reason, type) (imxc_kubernetes_event_in_last_min{involved_kind=~"Job", {filter} })','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} Name : {{$labels.involved_name}} ');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('event_alert_cronjob','Event Alert CronJob','Event Alert CronJob','count by (xm_clst_id, xm_namespace, entity_type, involved_kind, involved_name, reason, type) (imxc_kubernetes_event_in_last_min{involved_kind=~"CronJob", {filter} })','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} Name : {{$labels.involved_name}} ');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('event_alert_normal','Event Alert Normal','Event Alert Normal','count by (xm_clst_id, xm_namespace, involved_kind, involved_name, reason, type) (imxc_kubernetes_event_in_last_min{type=~"Normal", {filter} })','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} Name : {{$labels.involved_name}} ');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('event_alert_warning','Event Alert Warning','Event Alert Warning','count by (xm_clst_id, xm_namespace, involved_kind, involved_name, reason, type) (imxc_kubernetes_event_in_last_min{type=~"Warning", {filter} })','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} Name : {{$labels.involved_name}} ');
-- node-export 부분 node-agent 변경
UPDATE public.metric_meta2 SET expr = '(100 - (avg by (xm_clst_id, xm_node_id, xm_entity_type)(clamp_max(rate(node_cpu_seconds_total{ name="node-agent", mode="idle", xm_entity_type="Node", {filter} }[1m]),1.0) * 100)))' WHERE id ='node_cpu_usage';
UPDATE public.metric_meta2 SET expr = '(100 - (avg by (xm_clst_id)(clamp_max(rate(node_cpu_seconds_total{ name="node-agent", mode="idle", xm_entity_type="Node", {filter} }[1m]),1.0)) * 100))' WHERE id ='cluster_cpu_usage';
UPDATE public.metric_meta2 SET expr = '(100 - (avg by (xm_clst_id, xm_node_id) (clamp_max(rate(node_cpu_seconds_total{name="node-agent", mode="idle", xm_entity_type="Node", {filter}}[1m]),1.0)) * 100)) * sum by(xm_clst_id, xm_node_id)(imxc_kubernetes_node_resource_capacity_cpu{{filter}}) / 100' WHERE id ='node_cpu_used';
UPDATE public.metric_meta2 SET expr = 'avg by (xm_clst_id, xm_node_id, xm_entity_type) (rate(node_cpu_seconds_total{name="node-agent", mode="iowait", xm_entity_type="Node" , {filter}}[1m])) * 100' WHERE id ='node_cpu_iowait';
UPDATE public.alert_rule_meta SET expr = '(100 - (avg by (xm_clst_id, xm_node_id, xm_entity_type) (rate(node_cpu_seconds_total{ name=''node-agent'', mode=''idle'', xm_entity_type=''Node'', {filter} }[1m])) * 100))' WHERE id = 97;
UPDATE public.agent_install_file_info SET name = 'node-agent' WHERE id=4;
commit;

View File

@@ -0,0 +1,68 @@
-- DeploymentConfig 지원을 위한 Table 추가
CREATE TABLE cmoa_deploymentconfig_base(
kube_flatting_time bigint,
cluster_id varchar(255),
kind varchar(30),
metadata_uid varchar(40),
row_index int,
kind_status varchar(50),
metadata_creationTimestamp varchar(25),
metadata_name text,
metadata_namespace text,
metadata_resourceVersion text,
spec_replicas text,
spec_template_spec_containers_image text,
spec_template_metadata_labels text,
status_availableReplicas text,
status_updatedReplicas text,
status_replicas text,
status_unavailableReplicas text,
create_time timestamp default now(),
PRIMARY KEY (kube_flatting_time, cluster_id, kind, metadata_uid, row_index)
);
-- ReplicationController 지원을 위한 Table 추가
CREATE TABLE cmoa_replicationcontroller_base (
kube_flatting_time bigint,
cluster_id varchar(255),
kind varchar(30),
metadata_uid varchar(40),
row_index int,
kind_status varchar(50),
metadata_annotations text,
metadata_creationtimestamp varchar(25),
metadata_labels text,
metadata_name text,
metadata_namespace text,
metadata_resourceversion text,
spec_replicas text,
status_availablereplicas text,
status_readyreplicas text,
status_replicas text,
status_observedgeneration text,
create_time timestamp default now(),
PRIMARY KEY (kube_flatting_time, cluster_id, kind, metadata_uid, row_index)
);
create table public.audit_log
(
id bigserial
constraint audit_log_pk
primary key,
created_date timestamp not null,
user_id varchar not null,
type varchar not null,
menu_path varchar not null,
result boolean not null,
target jsonb
);
-- DeploymentConfig Event 관련 metric meta 추가
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('kube_event_deploymentconfig','Kubernetes DeploymentConfig Event','Kubernetes DeploymentConfig Event','count by (xm_clst_id,xm_namespace,entity_type,involved_kind,involved_name,reason,type) (imxc_kubernetes_event_in_last_min{involved_kind="DeploymentConfig", {filter} })','Event','Workload','','true','false','CLST:{{$labels.xm_clst_id}} NS:{{$labels.xm_namespace}} Reason : {{$labels.reason}} Level : {{$labels.type}} ');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('host_count','Host Count','Host Count','count(node_boot_time_seconds{is_host="true"})','Host','System','','true','false','Host Count : {{humanize $value}}');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('host_alive','Host Alive','Host Alive','count(node_boot_time_seconds{is_host="true", {filter}})','Host','System','','true','false','Host Alive : {{$labels.instance}}');
INSERT INTO public.metric_meta2 (id,meta_name,description,expr,resource_type,entity_type,groupby_keys,in_use,anomaly_score,message) VALUES('host_dead','Host Dead','Host Dead','absent(node_boot_time_seconds{is_host="true", {filter}})','Host','System','','true','false','Host Dead : {{$labels.instance}}');
commit;

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,4 @@
alter table cloud_user alter column log_in_count set default 0;
alter table cloud_user alter column user_lock set default false;
UPDATE public.metric_meta2 SET meta_name = 'Number of Containers Restart', description = 'Number of Containers Restart (10m)', expr = 'increase(imxc_kubernetes_container_restart_count{{filter}}[10m])', resource_type = 'State', entity_type = 'Workload', groupby_keys = null, in_use = true, anomaly_score = false, message = 'CLST:{{$labels.xm_clst_id}} CONT:{{$labels.xm_cont_name}} RESTARTCOUNT FOR 10MINUTE:{{humanize $value}}.', created_date = '2021-06-23 09:30:38.646312', modified_date = '2021-06-23 09:30:38.646312' WHERE id = 'cotainer_restart_count_by_workload';

View File

@@ -0,0 +1,24 @@
---
- name: 1. Get a list of all pods from the namespace
command: kubectl -n "{{ cmoa_namespace }}" get pods --no-headers -o custom-columns=":metadata.name"
#register: pod_list
- debug:
msg: "{{ pod_list.stdout_lines }}"
#- name: 2. Copy psql file in postgres (DDL)
# kubernetes.core.k8s_cp:
# namespace: "{{ cmoa_namespace }}"
# pod: "{{ pod_list.stdout }}"
# remote_path: /tmp/postgres_patch_{{ pg_version }}.psql
# local_path: "{{ role_path }}/files/pg-patch/postgres_patch_{{ pg_version }}.psql"
# with_items: "{{ pg_version }}"
# when: kubernetes_role == 'master'
#
#- name: 3. Execute a command in postgres (DDL)
# kubernetes.core.k8s_exec:
# namespace: "{{ cmoa_namespace }}"
# pod: "{{ pod_list.stdout }}"
# command: bash -c "PGPASSWORD='eorbahrhkswp' && /usr/bin/psql -h 'localhost' -U 'admin' -d 'postgresdb' -f /tmp/postgres_patch_{{ pg_version }}.psql"
# with_items: "{{ pg_version }}"
# when: kubernetes_role == 'master'